Add ParaformerDecoder_int8 (int8, ~half size, accuracy-neutral)

01f0617 verified 9 days ago

310 kB

	program(1.0)
	[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})]
	{
	func main<ios17>(tensor<fp32, [1, 128, 512]> ac, tensor<int32, [1]> elen, tensor<fp32, [1, 512, 512]> enc, tensor<int32, [1]> tn) {
	tensor<int32, []> var_20 = const()[name = tensor<string, []>("op_20"), val = tensor<int32, []>(-1)];
	tensor<int32, [128]> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<int32, [128]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127])];
	tensor<int32, [1]> matrix_1_axes_0 = const()[name = tensor<string, []>("matrix_1_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<int32, [1, 1]> matrix_1 = expand_dims(axes = matrix_1_axes_0, x = tn)[name = tensor<string, []>("matrix_1")];
	tensor<bool, [1, 128]> mask_1 = less(x = const_1, y = matrix_1)[name = tensor<string, []>("mask_1")];
	tensor<int32, [1]> mask_9_axes_0 = const()[name = tensor<string, []>("mask_9_axes_0"), val = tensor<int32, [1]>([2])];
	tensor<string, []> cast_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
	tensor<fp16, [1, 128]> mask_1_to_fp16 = cast(dtype = cast_0_to_fp16_dtype_0, x = mask_1)[name = tensor<string, []>("cast_3")];
	tensor<fp16, [1, 128, 1]> mask_9_cast_fp16 = expand_dims(axes = mask_9_axes_0, x = mask_1_to_fp16)[name = tensor<string, []>("mask_9_cast_fp16")];
	tensor<int32, [512]> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<int32, [512]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])];
	tensor<int32, [1]> matrix_axes_0 = const()[name = tensor<string, []>("matrix_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<int32, [1, 1]> matrix = expand_dims(axes = matrix_axes_0, x = elen)[name = tensor<string, []>("matrix")];
	tensor<bool, [1, 512]> mask_5 = less(x = const_3, y = matrix)[name = tensor<string, []>("mask_5")];
	tensor<int32, [1]> var_51_axes_0 = const()[name = tensor<string, []>("op_51_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<string, []> cast_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
	tensor<fp16, [1, 512]> mask_5_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = mask_5)[name = tensor<string, []>("cast_2")];
	tensor<fp16, [1, 1, 512]> var_51_cast_fp16 = expand_dims(axes = var_51_axes_0, x = mask_5_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
	tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<string, []> ac_to_fp16_dtype_0 = const()[name = tensor<string, []>("ac_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
	tensor<fp16, [512]> d_decoders_0_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
	tensor<fp16, [512]> d_decoders_0_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1152)))];
	tensor<fp16, []> var_15_to_fp16 = const()[name = tensor<string, []>("op_15_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
	tensor<fp16, [1, 128, 512]> ac_to_fp16 = cast(dtype = ac_to_fp16_dtype_0, x = ac)[name = tensor<string, []>("cast_1")];
	tensor<fp16, [1, 128, 512]> input_1_cast_fp16 = layer_norm(axes = input_1_axes_0, beta = d_decoders_0_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_norm1_weight_to_fp16, x = ac_to_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_0_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_0_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2240))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1052992))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_0_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1057152)))];
	tensor<fp16, [1, 128, 2048]> linear_0_cast_fp16 = linear(bias = d_decoders_0_feed_forward_w_1_bias_to_fp16, weight = d_decoders_0_feed_forward_w_1_weight_to_fp16_quantized, x = input_1_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_5_cast_fp16 = relu(x = linear_0_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
	tensor<int32, [1]> input_9_axes_0 = const()[name = tensor<string, []>("input_9_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_0_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1061312)))];
	tensor<fp16, [2048]> d_decoders_0_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1065472)))];
	tensor<fp16, [1, 128, 2048]> input_9_cast_fp16 = layer_norm(axes = input_9_axes_0, beta = d_decoders_0_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_feed_forward_norm_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_0_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_0_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1069632))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118848))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2119936)))];
	tensor<fp16, [1, 128, 512]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_0_feed_forward_w_2_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
	tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_0_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2121024)))];
	tensor<fp16, [512]> d_decoders_0_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2122112)))];
	tensor<fp16, [1, 128, 512]> inputs_1_cast_fp16 = layer_norm(axes = inputs_1_axes_0, beta = d_decoders_0_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_norm2_weight_to_fp16, x = linear_1_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_3_cast_fp16 = mul(x = inputs_1_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
	tensor<int32, [3]> x_1_perm_0 = const()[name = tensor<string, []>("x_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_13_mode_0 = const()[name = tensor<string, []>("input_13_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_5_to_fp16 = const()[name = tensor<string, []>("const_5_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_1_cast_fp16 = transpose(perm = x_1_perm_0, x = inputs_3_cast_fp16)[name = tensor<string, []>("transpose_191")];
	tensor<fp16, [1, 512, 138]> input_13_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = input_13_mode_0, pad = input_13_pad_0, x = x_1_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
	tensor<string, []> x_3_pad_type_0 = const()[name = tensor<string, []>("x_3_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_3_groups_0 = const()[name = tensor<string, []>("x_3_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_3_strides_0 = const()[name = tensor<string, []>("x_3_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_3_pad_0 = const()[name = tensor<string, []>("x_3_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_3_dilations_0 = const()[name = tensor<string, []>("x_3_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_0_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_0_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2123200))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2128896))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_3_cast_fp16 = conv(dilations = x_3_dilations_0, groups = x_3_groups_0, pad = x_3_pad_0, pad_type = x_3_pad_type_0, strides = x_3_strides_0, weight = d_decoders_0_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
	tensor<int32, [3]> x_5_perm_0 = const()[name = tensor<string, []>("x_5_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_5_cast_fp16 = transpose(perm = x_5_perm_0, x = x_3_cast_fp16)[name = tensor<string, []>("transpose_190")];
	tensor<fp16, [1, 128, 512]> input_15_cast_fp16 = add(x = x_5_cast_fp16, y = inputs_3_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_17_cast_fp16 = mul(x = input_15_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_19_cast_fp16 = add(x = ac_to_fp16, y = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
	tensor<int32, [1]> x_11_axes_0 = const()[name = tensor<string, []>("x_11_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_0_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2129984)))];
	tensor<fp16, [512]> d_decoders_0_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2131072)))];
	tensor<fp16, [1, 128, 512]> x_11_cast_fp16 = layer_norm(axes = x_11_axes_0, beta = d_decoders_0_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_norm3_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_0_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_0_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2132160))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2394368))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_0_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2395456)))];
	tensor<fp16, [1, 128, 512]> linear_2_cast_fp16 = linear(bias = d_decoders_0_src_attn_linear_q_bias_to_fp16, weight = d_decoders_0_src_attn_linear_q_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
	tensor<int32, [4]> var_128 = const()[name = tensor<string, []>("op_128"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_129_cast_fp16 = reshape(shape = var_128, x = linear_2_cast_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
	tensor<string, []> enc_to_fp16_dtype_0 = const()[name = tensor<string, []>("enc_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
	tensor<fp16, [1024, 512]> d_decoders_0_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_0_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2396544))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2921984))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_0_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2924096)))];
	tensor<fp16, [1, 512, 512]> enc_to_fp16 = cast(dtype = enc_to_fp16_dtype_0, x = enc)[name = tensor<string, []>("cast_0")];
	tensor<fp16, [1, 512, 1024]> linear_3_cast_fp16 = linear(bias = d_decoders_0_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_0_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
	tensor<int32, [2]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_134_axis_0 = const()[name = tensor<string, []>("op_134_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_134_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_134_cast_fp16_1 = split(axis = var_134_axis_0, split_sizes = tile_0, x = linear_3_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
	tensor<int32, [4]> var_137 = const()[name = tensor<string, []>("op_137"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_138_cast_fp16 = reshape(shape = var_137, x = var_134_cast_fp16_0)[name = tensor<string, []>("op_138_cast_fp16")];
	tensor<int32, [4]> var_140 = const()[name = tensor<string, []>("op_140"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_141_cast_fp16 = reshape(shape = var_140, x = var_134_cast_fp16_1)[name = tensor<string, []>("op_141_cast_fp16")];
	tensor<int32, [4]> value_1_perm_0 = const()[name = tensor<string, []>("value_1_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_143_to_fp16 = const()[name = tensor<string, []>("op_143_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_3_cast_fp16 = mul(x = var_129_cast_fp16, y = var_143_to_fp16)[name = tensor<string, []>("q_h_3_cast_fp16")];
	tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_64_perm_0 = const()[name = tensor<string, []>("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_65_perm_0 = const()[name = tensor<string, []>("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_65 = transpose(perm = transpose_65_perm_0, x = var_138_cast_fp16)[name = tensor<string, []>("transpose_188")];
	tensor<fp16, [1, 4, 128, 128]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_h_3_cast_fp16)[name = tensor<string, []>("transpose_189")];
	tensor<fp16, [1, 4, 128, 512]> scores_1_cast_fp16 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_64, y = transpose_65)[name = tensor<string, []>("scores_1_cast_fp16")];
	tensor<int32, [1]> var_148_axes_0 = const()[name = tensor<string, []>("op_148_axes_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [1, 1, 1, 512]> var_148_cast_fp16 = expand_dims(axes = var_148_axes_0, x = var_51_cast_fp16)[name = tensor<string, []>("op_148_cast_fp16")];
	tensor<fp16, []> var_25_promoted_to_fp16 = const()[name = tensor<string, []>("op_25_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<bool, [1, 1, 1, 512]> mask_15_cast_fp16 = equal(x = var_148_cast_fp16, y = var_25_promoted_to_fp16)[name = tensor<string, []>("mask_15_cast_fp16")];
	tensor<fp16, []> var_8_to_fp16 = const()[name = tensor<string, []>("op_8_to_fp16"), val = tensor<fp16, []>(-inf)];
	tensor<fp16, [1, 4, 128, 512]> scores_3_cast_fp16 = select(a = var_8_to_fp16, b = scores_1_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_3_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_151_cast_fp16 = softmax(axis = var_20, x = scores_3_cast_fp16)[name = tensor<string, []>("op_151_cast_fp16")];
	tensor<fp16, []> var_9_to_fp16 = const()[name = tensor<string, []>("op_9_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 4, 128, 512]> input_21_cast_fp16 = select(a = var_9_to_fp16, b = var_151_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
	tensor<bool, []> x_13_transpose_x_0 = const()[name = tensor<string, []>("x_13_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_13_transpose_y_0 = const()[name = tensor<string, []>("x_13_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_1_cast_fp16 = transpose(perm = value_1_perm_0, x = var_141_cast_fp16)[name = tensor<string, []>("transpose_187")];
	tensor<fp16, [1, 4, 128, 128]> x_13_cast_fp16 = matmul(transpose_x = x_13_transpose_x_0, transpose_y = x_13_transpose_y_0, x = input_21_cast_fp16, y = value_1_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
	tensor<int32, [4]> var_155_perm_0 = const()[name = tensor<string, []>("op_155_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_157 = const()[name = tensor<string, []>("op_157"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_155_cast_fp16 = transpose(perm = var_155_perm_0, x = x_13_cast_fp16)[name = tensor<string, []>("transpose_186")];
	tensor<fp16, [1, 128, 512]> input_23_cast_fp16 = reshape(shape = var_157, x = var_155_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_0_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_0_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2926208))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3188416))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_0_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3189504)))];
	tensor<fp16, [1, 128, 512]> linear_4_cast_fp16 = linear(bias = d_decoders_0_src_attn_linear_out_bias_to_fp16, weight = d_decoders_0_src_attn_linear_out_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_27_cast_fp16 = add(x = input_19_cast_fp16, y = linear_4_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
	tensor<int32, [1]> input_29_axes_0 = const()[name = tensor<string, []>("input_29_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_1_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3190592)))];
	tensor<fp16, [512]> d_decoders_1_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3191680)))];
	tensor<fp16, [1, 128, 512]> input_29_cast_fp16 = layer_norm(axes = input_29_axes_0, beta = d_decoders_1_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_norm1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_1_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_1_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3192768))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4241408))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_1_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4245568)))];
	tensor<fp16, [1, 128, 2048]> linear_5_cast_fp16 = linear(bias = d_decoders_1_feed_forward_w_1_bias_to_fp16, weight = d_decoders_1_feed_forward_w_1_weight_to_fp16_quantized, x = input_29_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_33_cast_fp16 = relu(x = linear_5_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
	tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_1_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4249728)))];
	tensor<fp16, [2048]> d_decoders_1_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4253888)))];
	tensor<fp16, [1, 128, 2048]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = d_decoders_1_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_feed_forward_norm_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_1_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_1_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4258048))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5306688))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_1_feed_forward_w_2_weight_to_fp16_quantized, x = input_37_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
	tensor<int32, [1]> inputs_5_axes_0 = const()[name = tensor<string, []>("inputs_5_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_1_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5307776)))];
	tensor<fp16, [512]> d_decoders_1_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5308864)))];
	tensor<fp16, [1, 128, 512]> inputs_5_cast_fp16 = layer_norm(axes = inputs_5_axes_0, beta = d_decoders_1_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_norm2_weight_to_fp16, x = linear_6_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_7_cast_fp16 = mul(x = inputs_5_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
	tensor<int32, [3]> x_15_perm_0 = const()[name = tensor<string, []>("x_15_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_41_pad_0 = const()[name = tensor<string, []>("input_41_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_11_to_fp16 = const()[name = tensor<string, []>("const_11_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_15_cast_fp16 = transpose(perm = x_15_perm_0, x = inputs_7_cast_fp16)[name = tensor<string, []>("transpose_185")];
	tensor<fp16, [1, 512, 138]> input_41_cast_fp16 = pad(constant_val = const_11_to_fp16, mode = input_41_mode_0, pad = input_41_pad_0, x = x_15_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
	tensor<string, []> x_17_pad_type_0 = const()[name = tensor<string, []>("x_17_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_17_groups_0 = const()[name = tensor<string, []>("x_17_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_17_strides_0 = const()[name = tensor<string, []>("x_17_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_17_pad_0 = const()[name = tensor<string, []>("x_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_17_dilations_0 = const()[name = tensor<string, []>("x_17_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_1_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_1_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5309952))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5315648))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_17_cast_fp16 = conv(dilations = x_17_dilations_0, groups = x_17_groups_0, pad = x_17_pad_0, pad_type = x_17_pad_type_0, strides = x_17_strides_0, weight = d_decoders_1_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
	tensor<int32, [3]> x_19_perm_0 = const()[name = tensor<string, []>("x_19_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_19_cast_fp16 = transpose(perm = x_19_perm_0, x = x_17_cast_fp16)[name = tensor<string, []>("transpose_184")];
	tensor<fp16, [1, 128, 512]> input_43_cast_fp16 = add(x = x_19_cast_fp16, y = inputs_7_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_45_cast_fp16 = mul(x = input_43_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_47_cast_fp16 = add(x = input_27_cast_fp16, y = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
	tensor<int32, [1]> x_25_axes_0 = const()[name = tensor<string, []>("x_25_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_1_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5316736)))];
	tensor<fp16, [512]> d_decoders_1_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5317824)))];
	tensor<fp16, [1, 128, 512]> x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, beta = d_decoders_1_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_norm3_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_1_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_1_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5318912))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5581120))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_1_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5582208)))];
	tensor<fp16, [1, 128, 512]> linear_7_cast_fp16 = linear(bias = d_decoders_1_src_attn_linear_q_bias_to_fp16, weight = d_decoders_1_src_attn_linear_q_weight_to_fp16_quantized, x = x_25_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
	tensor<int32, [4]> var_223 = const()[name = tensor<string, []>("op_223"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_224_cast_fp16 = reshape(shape = var_223, x = linear_7_cast_fp16)[name = tensor<string, []>("op_224_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_1_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_1_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5583296))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6107648))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_1_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6109760)))];
	tensor<fp16, [1, 512, 1024]> linear_8_cast_fp16 = linear(bias = d_decoders_1_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_1_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
	tensor<int32, [2]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_229_axis_0 = const()[name = tensor<string, []>("op_229_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_229_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_229_cast_fp16_1 = split(axis = var_229_axis_0, split_sizes = tile_1, x = linear_8_cast_fp16)[name = tensor<string, []>("op_229_cast_fp16")];
	tensor<int32, [4]> var_232 = const()[name = tensor<string, []>("op_232"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_233_cast_fp16 = reshape(shape = var_232, x = var_229_cast_fp16_0)[name = tensor<string, []>("op_233_cast_fp16")];
	tensor<int32, [4]> var_235 = const()[name = tensor<string, []>("op_235"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_236_cast_fp16 = reshape(shape = var_235, x = var_229_cast_fp16_1)[name = tensor<string, []>("op_236_cast_fp16")];
	tensor<int32, [4]> value_3_perm_0 = const()[name = tensor<string, []>("value_3_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_238_to_fp16 = const()[name = tensor<string, []>("op_238_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_7_cast_fp16 = mul(x = var_224_cast_fp16, y = var_238_to_fp16)[name = tensor<string, []>("q_h_7_cast_fp16")];
	tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_66_perm_0 = const()[name = tensor<string, []>("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_67_perm_0 = const()[name = tensor<string, []>("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_67 = transpose(perm = transpose_67_perm_0, x = var_233_cast_fp16)[name = tensor<string, []>("transpose_182")];
	tensor<fp16, [1, 4, 128, 128]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_h_7_cast_fp16)[name = tensor<string, []>("transpose_183")];
	tensor<fp16, [1, 4, 128, 512]> scores_5_cast_fp16 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_66, y = transpose_67)[name = tensor<string, []>("scores_5_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_7_cast_fp16 = select(a = var_8_to_fp16, b = scores_5_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_7_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_246_cast_fp16 = softmax(axis = var_20, x = scores_7_cast_fp16)[name = tensor<string, []>("op_246_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_49_cast_fp16 = select(a = var_9_to_fp16, b = var_246_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
	tensor<bool, []> x_27_transpose_x_0 = const()[name = tensor<string, []>("x_27_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_27_transpose_y_0 = const()[name = tensor<string, []>("x_27_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_3_cast_fp16 = transpose(perm = value_3_perm_0, x = var_236_cast_fp16)[name = tensor<string, []>("transpose_181")];
	tensor<fp16, [1, 4, 128, 128]> x_27_cast_fp16 = matmul(transpose_x = x_27_transpose_x_0, transpose_y = x_27_transpose_y_0, x = input_49_cast_fp16, y = value_3_cast_fp16)[name = tensor<string, []>("x_27_cast_fp16")];
	tensor<int32, [4]> var_250_perm_0 = const()[name = tensor<string, []>("op_250_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_252 = const()[name = tensor<string, []>("op_252"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_250_cast_fp16 = transpose(perm = var_250_perm_0, x = x_27_cast_fp16)[name = tensor<string, []>("transpose_180")];
	tensor<fp16, [1, 128, 512]> input_51_cast_fp16 = reshape(shape = var_252, x = var_250_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_1_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_1_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6111872))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6374080))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_1_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6375168)))];
	tensor<fp16, [1, 128, 512]> linear_9_cast_fp16 = linear(bias = d_decoders_1_src_attn_linear_out_bias_to_fp16, weight = d_decoders_1_src_attn_linear_out_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_55_cast_fp16 = add(x = input_47_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
	tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_2_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6376256)))];
	tensor<fp16, [512]> d_decoders_2_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6377344)))];
	tensor<fp16, [1, 128, 512]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = d_decoders_2_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_norm1_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_2_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_2_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6378432))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7427072))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_2_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7431232)))];
	tensor<fp16, [1, 128, 2048]> linear_10_cast_fp16 = linear(bias = d_decoders_2_feed_forward_w_1_bias_to_fp16, weight = d_decoders_2_feed_forward_w_1_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_61_cast_fp16 = relu(x = linear_10_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
	tensor<int32, [1]> input_65_axes_0 = const()[name = tensor<string, []>("input_65_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_2_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7435392)))];
	tensor<fp16, [2048]> d_decoders_2_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7439552)))];
	tensor<fp16, [1, 128, 2048]> input_65_cast_fp16 = layer_norm(axes = input_65_axes_0, beta = d_decoders_2_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_feed_forward_norm_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_2_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_2_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7443712))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8492352))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_2_feed_forward_w_2_weight_to_fp16_quantized, x = input_65_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
	tensor<int32, [1]> inputs_9_axes_0 = const()[name = tensor<string, []>("inputs_9_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_2_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8493440)))];
	tensor<fp16, [512]> d_decoders_2_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8494528)))];
	tensor<fp16, [1, 128, 512]> inputs_9_cast_fp16 = layer_norm(axes = inputs_9_axes_0, beta = d_decoders_2_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_norm2_weight_to_fp16, x = linear_11_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_11_cast_fp16 = mul(x = inputs_9_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
	tensor<int32, [3]> x_29_perm_0 = const()[name = tensor<string, []>("x_29_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_17_to_fp16 = const()[name = tensor<string, []>("const_17_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_29_cast_fp16 = transpose(perm = x_29_perm_0, x = inputs_11_cast_fp16)[name = tensor<string, []>("transpose_179")];
	tensor<fp16, [1, 512, 138]> input_69_cast_fp16 = pad(constant_val = const_17_to_fp16, mode = input_69_mode_0, pad = input_69_pad_0, x = x_29_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
	tensor<string, []> x_31_pad_type_0 = const()[name = tensor<string, []>("x_31_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_31_groups_0 = const()[name = tensor<string, []>("x_31_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_31_strides_0 = const()[name = tensor<string, []>("x_31_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_31_pad_0 = const()[name = tensor<string, []>("x_31_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_31_dilations_0 = const()[name = tensor<string, []>("x_31_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_2_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_2_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8495616))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8501312))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = d_decoders_2_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
	tensor<int32, [3]> x_33_perm_0 = const()[name = tensor<string, []>("x_33_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_33_cast_fp16 = transpose(perm = x_33_perm_0, x = x_31_cast_fp16)[name = tensor<string, []>("transpose_178")];
	tensor<fp16, [1, 128, 512]> input_71_cast_fp16 = add(x = x_33_cast_fp16, y = inputs_11_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_73_cast_fp16 = mul(x = input_71_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_75_cast_fp16 = add(x = input_55_cast_fp16, y = input_73_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
	tensor<int32, [1]> x_39_axes_0 = const()[name = tensor<string, []>("x_39_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_2_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8502400)))];
	tensor<fp16, [512]> d_decoders_2_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8503488)))];
	tensor<fp16, [1, 128, 512]> x_39_cast_fp16 = layer_norm(axes = x_39_axes_0, beta = d_decoders_2_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_norm3_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_2_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_2_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8504576))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8766784))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_2_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8767872)))];
	tensor<fp16, [1, 128, 512]> linear_12_cast_fp16 = linear(bias = d_decoders_2_src_attn_linear_q_bias_to_fp16, weight = d_decoders_2_src_attn_linear_q_weight_to_fp16_quantized, x = x_39_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
	tensor<int32, [4]> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_319_cast_fp16 = reshape(shape = var_318, x = linear_12_cast_fp16)[name = tensor<string, []>("op_319_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_2_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_2_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8768960))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9293312))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_2_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9295424)))];
	tensor<fp16, [1, 512, 1024]> linear_13_cast_fp16 = linear(bias = d_decoders_2_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_2_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
	tensor<int32, [2]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_324_axis_0 = const()[name = tensor<string, []>("op_324_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_324_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_324_cast_fp16_1 = split(axis = var_324_axis_0, split_sizes = tile_2, x = linear_13_cast_fp16)[name = tensor<string, []>("op_324_cast_fp16")];
	tensor<int32, [4]> var_327 = const()[name = tensor<string, []>("op_327"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_328_cast_fp16 = reshape(shape = var_327, x = var_324_cast_fp16_0)[name = tensor<string, []>("op_328_cast_fp16")];
	tensor<int32, [4]> var_330 = const()[name = tensor<string, []>("op_330"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_331_cast_fp16 = reshape(shape = var_330, x = var_324_cast_fp16_1)[name = tensor<string, []>("op_331_cast_fp16")];
	tensor<int32, [4]> value_5_perm_0 = const()[name = tensor<string, []>("value_5_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_333_to_fp16 = const()[name = tensor<string, []>("op_333_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_11_cast_fp16 = mul(x = var_319_cast_fp16, y = var_333_to_fp16)[name = tensor<string, []>("q_h_11_cast_fp16")];
	tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_68_perm_0 = const()[name = tensor<string, []>("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_69_perm_0 = const()[name = tensor<string, []>("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_69 = transpose(perm = transpose_69_perm_0, x = var_328_cast_fp16)[name = tensor<string, []>("transpose_176")];
	tensor<fp16, [1, 4, 128, 128]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_h_11_cast_fp16)[name = tensor<string, []>("transpose_177")];
	tensor<fp16, [1, 4, 128, 512]> scores_9_cast_fp16 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_68, y = transpose_69)[name = tensor<string, []>("scores_9_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_11_cast_fp16 = select(a = var_8_to_fp16, b = scores_9_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_11_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_341_cast_fp16 = softmax(axis = var_20, x = scores_11_cast_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_77_cast_fp16 = select(a = var_9_to_fp16, b = var_341_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
	tensor<bool, []> x_41_transpose_x_0 = const()[name = tensor<string, []>("x_41_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_41_transpose_y_0 = const()[name = tensor<string, []>("x_41_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_5_cast_fp16 = transpose(perm = value_5_perm_0, x = var_331_cast_fp16)[name = tensor<string, []>("transpose_175")];
	tensor<fp16, [1, 4, 128, 128]> x_41_cast_fp16 = matmul(transpose_x = x_41_transpose_x_0, transpose_y = x_41_transpose_y_0, x = input_77_cast_fp16, y = value_5_cast_fp16)[name = tensor<string, []>("x_41_cast_fp16")];
	tensor<int32, [4]> var_345_perm_0 = const()[name = tensor<string, []>("op_345_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_347 = const()[name = tensor<string, []>("op_347"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_345_cast_fp16 = transpose(perm = var_345_perm_0, x = x_41_cast_fp16)[name = tensor<string, []>("transpose_174")];
	tensor<fp16, [1, 128, 512]> input_79_cast_fp16 = reshape(shape = var_347, x = var_345_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_2_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_2_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9297536))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9559744))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_2_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9560832)))];
	tensor<fp16, [1, 128, 512]> linear_14_cast_fp16 = linear(bias = d_decoders_2_src_attn_linear_out_bias_to_fp16, weight = d_decoders_2_src_attn_linear_out_weight_to_fp16_quantized, x = input_79_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_83_cast_fp16 = add(x = input_75_cast_fp16, y = linear_14_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
	tensor<int32, [1]> input_85_axes_0 = const()[name = tensor<string, []>("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_3_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9561920)))];
	tensor<fp16, [512]> d_decoders_3_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9563008)))];
	tensor<fp16, [1, 128, 512]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = d_decoders_3_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_norm1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_3_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_3_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9564096))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10612736))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_3_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10616896)))];
	tensor<fp16, [1, 128, 2048]> linear_15_cast_fp16 = linear(bias = d_decoders_3_feed_forward_w_1_bias_to_fp16, weight = d_decoders_3_feed_forward_w_1_weight_to_fp16_quantized, x = input_85_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_89_cast_fp16 = relu(x = linear_15_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
	tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_3_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10621056)))];
	tensor<fp16, [2048]> d_decoders_3_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10625216)))];
	tensor<fp16, [1, 128, 2048]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = d_decoders_3_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_feed_forward_norm_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_3_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_3_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10629376))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11678016))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_3_feed_forward_w_2_weight_to_fp16_quantized, x = input_93_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
	tensor<int32, [1]> inputs_13_axes_0 = const()[name = tensor<string, []>("inputs_13_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_3_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11679104)))];
	tensor<fp16, [512]> d_decoders_3_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11680192)))];
	tensor<fp16, [1, 128, 512]> inputs_13_cast_fp16 = layer_norm(axes = inputs_13_axes_0, beta = d_decoders_3_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_norm2_weight_to_fp16, x = linear_16_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_15_cast_fp16 = mul(x = inputs_13_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
	tensor<int32, [3]> x_43_perm_0 = const()[name = tensor<string, []>("x_43_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_97_pad_0 = const()[name = tensor<string, []>("input_97_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_97_mode_0 = const()[name = tensor<string, []>("input_97_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_23_to_fp16 = const()[name = tensor<string, []>("const_23_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_43_cast_fp16 = transpose(perm = x_43_perm_0, x = inputs_15_cast_fp16)[name = tensor<string, []>("transpose_173")];
	tensor<fp16, [1, 512, 138]> input_97_cast_fp16 = pad(constant_val = const_23_to_fp16, mode = input_97_mode_0, pad = input_97_pad_0, x = x_43_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
	tensor<string, []> x_45_pad_type_0 = const()[name = tensor<string, []>("x_45_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_45_groups_0 = const()[name = tensor<string, []>("x_45_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_45_strides_0 = const()[name = tensor<string, []>("x_45_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_45_pad_0 = const()[name = tensor<string, []>("x_45_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_45_dilations_0 = const()[name = tensor<string, []>("x_45_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_3_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_3_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11681280))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11686976))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_45_cast_fp16 = conv(dilations = x_45_dilations_0, groups = x_45_groups_0, pad = x_45_pad_0, pad_type = x_45_pad_type_0, strides = x_45_strides_0, weight = d_decoders_3_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = tensor<string, []>("x_45_cast_fp16")];
	tensor<int32, [3]> x_47_perm_0 = const()[name = tensor<string, []>("x_47_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_47_cast_fp16 = transpose(perm = x_47_perm_0, x = x_45_cast_fp16)[name = tensor<string, []>("transpose_172")];
	tensor<fp16, [1, 128, 512]> input_99_cast_fp16 = add(x = x_47_cast_fp16, y = inputs_15_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_101_cast_fp16 = mul(x = input_99_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_103_cast_fp16 = add(x = input_83_cast_fp16, y = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
	tensor<int32, [1]> x_53_axes_0 = const()[name = tensor<string, []>("x_53_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_3_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11688064)))];
	tensor<fp16, [512]> d_decoders_3_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11689152)))];
	tensor<fp16, [1, 128, 512]> x_53_cast_fp16 = layer_norm(axes = x_53_axes_0, beta = d_decoders_3_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_norm3_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("x_53_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_3_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_3_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11690240))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11952448))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_3_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11953536)))];
	tensor<fp16, [1, 128, 512]> linear_17_cast_fp16 = linear(bias = d_decoders_3_src_attn_linear_q_bias_to_fp16, weight = d_decoders_3_src_attn_linear_q_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
	tensor<int32, [4]> var_413 = const()[name = tensor<string, []>("op_413"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_414_cast_fp16 = reshape(shape = var_413, x = linear_17_cast_fp16)[name = tensor<string, []>("op_414_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_3_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_3_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11954624))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12478976))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_3_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12481088)))];
	tensor<fp16, [1, 512, 1024]> linear_18_cast_fp16 = linear(bias = d_decoders_3_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_3_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
	tensor<int32, [2]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_419_axis_0 = const()[name = tensor<string, []>("op_419_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_419_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_419_cast_fp16_1 = split(axis = var_419_axis_0, split_sizes = tile_3, x = linear_18_cast_fp16)[name = tensor<string, []>("op_419_cast_fp16")];
	tensor<int32, [4]> var_422 = const()[name = tensor<string, []>("op_422"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_423_cast_fp16 = reshape(shape = var_422, x = var_419_cast_fp16_0)[name = tensor<string, []>("op_423_cast_fp16")];
	tensor<int32, [4]> var_425 = const()[name = tensor<string, []>("op_425"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_426_cast_fp16 = reshape(shape = var_425, x = var_419_cast_fp16_1)[name = tensor<string, []>("op_426_cast_fp16")];
	tensor<int32, [4]> value_7_perm_0 = const()[name = tensor<string, []>("value_7_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_428_to_fp16 = const()[name = tensor<string, []>("op_428_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_15_cast_fp16 = mul(x = var_414_cast_fp16, y = var_428_to_fp16)[name = tensor<string, []>("q_h_15_cast_fp16")];
	tensor<bool, []> scores_13_transpose_x_0 = const()[name = tensor<string, []>("scores_13_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_13_transpose_y_0 = const()[name = tensor<string, []>("scores_13_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_70_perm_0 = const()[name = tensor<string, []>("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_71_perm_0 = const()[name = tensor<string, []>("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_71 = transpose(perm = transpose_71_perm_0, x = var_423_cast_fp16)[name = tensor<string, []>("transpose_170")];
	tensor<fp16, [1, 4, 128, 128]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_h_15_cast_fp16)[name = tensor<string, []>("transpose_171")];
	tensor<fp16, [1, 4, 128, 512]> scores_13_cast_fp16 = matmul(transpose_x = scores_13_transpose_x_0, transpose_y = scores_13_transpose_y_0, x = transpose_70, y = transpose_71)[name = tensor<string, []>("scores_13_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_15_cast_fp16 = select(a = var_8_to_fp16, b = scores_13_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_15_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_436_cast_fp16 = softmax(axis = var_20, x = scores_15_cast_fp16)[name = tensor<string, []>("op_436_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_105_cast_fp16 = select(a = var_9_to_fp16, b = var_436_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
	tensor<bool, []> x_55_transpose_x_0 = const()[name = tensor<string, []>("x_55_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_55_transpose_y_0 = const()[name = tensor<string, []>("x_55_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_7_cast_fp16 = transpose(perm = value_7_perm_0, x = var_426_cast_fp16)[name = tensor<string, []>("transpose_169")];
	tensor<fp16, [1, 4, 128, 128]> x_55_cast_fp16 = matmul(transpose_x = x_55_transpose_x_0, transpose_y = x_55_transpose_y_0, x = input_105_cast_fp16, y = value_7_cast_fp16)[name = tensor<string, []>("x_55_cast_fp16")];
	tensor<int32, [4]> var_440_perm_0 = const()[name = tensor<string, []>("op_440_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_442 = const()[name = tensor<string, []>("op_442"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_440_cast_fp16 = transpose(perm = var_440_perm_0, x = x_55_cast_fp16)[name = tensor<string, []>("transpose_168")];
	tensor<fp16, [1, 128, 512]> input_107_cast_fp16 = reshape(shape = var_442, x = var_440_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_3_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_3_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12483200))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12745408))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_3_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12746496)))];
	tensor<fp16, [1, 128, 512]> linear_19_cast_fp16 = linear(bias = d_decoders_3_src_attn_linear_out_bias_to_fp16, weight = d_decoders_3_src_attn_linear_out_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_111_cast_fp16 = add(x = input_103_cast_fp16, y = linear_19_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
	tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_4_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12747584)))];
	tensor<fp16, [512]> d_decoders_4_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12748672)))];
	tensor<fp16, [1, 128, 512]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = d_decoders_4_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_norm1_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_4_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_4_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12749760))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13798400))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_4_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13802560)))];
	tensor<fp16, [1, 128, 2048]> linear_20_cast_fp16 = linear(bias = d_decoders_4_feed_forward_w_1_bias_to_fp16, weight = d_decoders_4_feed_forward_w_1_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_117_cast_fp16 = relu(x = linear_20_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
	tensor<int32, [1]> input_121_axes_0 = const()[name = tensor<string, []>("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_4_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13806720)))];
	tensor<fp16, [2048]> d_decoders_4_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13810880)))];
	tensor<fp16, [1, 128, 2048]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = d_decoders_4_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_feed_forward_norm_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_4_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_4_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13815040))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14863680))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_4_feed_forward_w_2_weight_to_fp16_quantized, x = input_121_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
	tensor<int32, [1]> inputs_17_axes_0 = const()[name = tensor<string, []>("inputs_17_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_4_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14864768)))];
	tensor<fp16, [512]> d_decoders_4_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14865856)))];
	tensor<fp16, [1, 128, 512]> inputs_17_cast_fp16 = layer_norm(axes = inputs_17_axes_0, beta = d_decoders_4_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_norm2_weight_to_fp16, x = linear_21_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_19_cast_fp16 = mul(x = inputs_17_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
	tensor<int32, [3]> x_57_perm_0 = const()[name = tensor<string, []>("x_57_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_125_pad_0 = const()[name = tensor<string, []>("input_125_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_125_mode_0 = const()[name = tensor<string, []>("input_125_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_29_to_fp16 = const()[name = tensor<string, []>("const_29_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_57_cast_fp16 = transpose(perm = x_57_perm_0, x = inputs_19_cast_fp16)[name = tensor<string, []>("transpose_167")];
	tensor<fp16, [1, 512, 138]> input_125_cast_fp16 = pad(constant_val = const_29_to_fp16, mode = input_125_mode_0, pad = input_125_pad_0, x = x_57_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
	tensor<string, []> x_59_pad_type_0 = const()[name = tensor<string, []>("x_59_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_59_groups_0 = const()[name = tensor<string, []>("x_59_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_59_strides_0 = const()[name = tensor<string, []>("x_59_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_59_pad_0 = const()[name = tensor<string, []>("x_59_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_59_dilations_0 = const()[name = tensor<string, []>("x_59_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_4_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_4_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14866944))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14872640))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_59_cast_fp16 = conv(dilations = x_59_dilations_0, groups = x_59_groups_0, pad = x_59_pad_0, pad_type = x_59_pad_type_0, strides = x_59_strides_0, weight = d_decoders_4_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = tensor<string, []>("x_59_cast_fp16")];
	tensor<int32, [3]> x_61_perm_0 = const()[name = tensor<string, []>("x_61_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_61_cast_fp16 = transpose(perm = x_61_perm_0, x = x_59_cast_fp16)[name = tensor<string, []>("transpose_166")];
	tensor<fp16, [1, 128, 512]> input_127_cast_fp16 = add(x = x_61_cast_fp16, y = inputs_19_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_129_cast_fp16 = mul(x = input_127_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_131_cast_fp16 = add(x = input_111_cast_fp16, y = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
	tensor<int32, [1]> x_67_axes_0 = const()[name = tensor<string, []>("x_67_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_4_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14873728)))];
	tensor<fp16, [512]> d_decoders_4_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14874816)))];
	tensor<fp16, [1, 128, 512]> x_67_cast_fp16 = layer_norm(axes = x_67_axes_0, beta = d_decoders_4_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_norm3_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("x_67_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_4_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_4_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14875904))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15138112))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_4_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15139200)))];
	tensor<fp16, [1, 128, 512]> linear_22_cast_fp16 = linear(bias = d_decoders_4_src_attn_linear_q_bias_to_fp16, weight = d_decoders_4_src_attn_linear_q_weight_to_fp16_quantized, x = x_67_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
	tensor<int32, [4]> var_508 = const()[name = tensor<string, []>("op_508"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_509_cast_fp16 = reshape(shape = var_508, x = linear_22_cast_fp16)[name = tensor<string, []>("op_509_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_4_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_4_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15140288))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15664640))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_4_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15666752)))];
	tensor<fp16, [1, 512, 1024]> linear_23_cast_fp16 = linear(bias = d_decoders_4_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_4_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
	tensor<int32, [2]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_514_axis_0 = const()[name = tensor<string, []>("op_514_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_514_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_514_cast_fp16_1 = split(axis = var_514_axis_0, split_sizes = tile_4, x = linear_23_cast_fp16)[name = tensor<string, []>("op_514_cast_fp16")];
	tensor<int32, [4]> var_517 = const()[name = tensor<string, []>("op_517"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_518_cast_fp16 = reshape(shape = var_517, x = var_514_cast_fp16_0)[name = tensor<string, []>("op_518_cast_fp16")];
	tensor<int32, [4]> var_520 = const()[name = tensor<string, []>("op_520"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_521_cast_fp16 = reshape(shape = var_520, x = var_514_cast_fp16_1)[name = tensor<string, []>("op_521_cast_fp16")];
	tensor<int32, [4]> value_9_perm_0 = const()[name = tensor<string, []>("value_9_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_523_to_fp16 = const()[name = tensor<string, []>("op_523_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_19_cast_fp16 = mul(x = var_509_cast_fp16, y = var_523_to_fp16)[name = tensor<string, []>("q_h_19_cast_fp16")];
	tensor<bool, []> scores_17_transpose_x_0 = const()[name = tensor<string, []>("scores_17_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_17_transpose_y_0 = const()[name = tensor<string, []>("scores_17_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_72_perm_0 = const()[name = tensor<string, []>("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_73_perm_0 = const()[name = tensor<string, []>("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_73 = transpose(perm = transpose_73_perm_0, x = var_518_cast_fp16)[name = tensor<string, []>("transpose_164")];
	tensor<fp16, [1, 4, 128, 128]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_h_19_cast_fp16)[name = tensor<string, []>("transpose_165")];
	tensor<fp16, [1, 4, 128, 512]> scores_17_cast_fp16 = matmul(transpose_x = scores_17_transpose_x_0, transpose_y = scores_17_transpose_y_0, x = transpose_72, y = transpose_73)[name = tensor<string, []>("scores_17_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_19_cast_fp16 = select(a = var_8_to_fp16, b = scores_17_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_19_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_531_cast_fp16 = softmax(axis = var_20, x = scores_19_cast_fp16)[name = tensor<string, []>("op_531_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_133_cast_fp16 = select(a = var_9_to_fp16, b = var_531_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
	tensor<bool, []> x_69_transpose_x_0 = const()[name = tensor<string, []>("x_69_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_69_transpose_y_0 = const()[name = tensor<string, []>("x_69_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_9_cast_fp16 = transpose(perm = value_9_perm_0, x = var_521_cast_fp16)[name = tensor<string, []>("transpose_163")];
	tensor<fp16, [1, 4, 128, 128]> x_69_cast_fp16 = matmul(transpose_x = x_69_transpose_x_0, transpose_y = x_69_transpose_y_0, x = input_133_cast_fp16, y = value_9_cast_fp16)[name = tensor<string, []>("x_69_cast_fp16")];
	tensor<int32, [4]> var_535_perm_0 = const()[name = tensor<string, []>("op_535_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_537 = const()[name = tensor<string, []>("op_537"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_535_cast_fp16 = transpose(perm = var_535_perm_0, x = x_69_cast_fp16)[name = tensor<string, []>("transpose_162")];
	tensor<fp16, [1, 128, 512]> input_135_cast_fp16 = reshape(shape = var_537, x = var_535_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_4_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_4_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15668864))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15931072))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_4_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15932160)))];
	tensor<fp16, [1, 128, 512]> linear_24_cast_fp16 = linear(bias = d_decoders_4_src_attn_linear_out_bias_to_fp16, weight = d_decoders_4_src_attn_linear_out_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_139_cast_fp16 = add(x = input_131_cast_fp16, y = linear_24_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
	tensor<int32, [1]> input_141_axes_0 = const()[name = tensor<string, []>("input_141_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_5_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15933248)))];
	tensor<fp16, [512]> d_decoders_5_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15934336)))];
	tensor<fp16, [1, 128, 512]> input_141_cast_fp16 = layer_norm(axes = input_141_axes_0, beta = d_decoders_5_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_norm1_weight_to_fp16, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_5_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_5_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15935424))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16984064))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_5_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16988224)))];
	tensor<fp16, [1, 128, 2048]> linear_25_cast_fp16 = linear(bias = d_decoders_5_feed_forward_w_1_bias_to_fp16, weight = d_decoders_5_feed_forward_w_1_weight_to_fp16_quantized, x = input_141_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_145_cast_fp16 = relu(x = linear_25_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
	tensor<int32, [1]> input_149_axes_0 = const()[name = tensor<string, []>("input_149_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_5_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16992384)))];
	tensor<fp16, [2048]> d_decoders_5_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16996544)))];
	tensor<fp16, [1, 128, 2048]> input_149_cast_fp16 = layer_norm(axes = input_149_axes_0, beta = d_decoders_5_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_feed_forward_norm_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_5_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_5_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17000704))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18049344))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_5_feed_forward_w_2_weight_to_fp16_quantized, x = input_149_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
	tensor<int32, [1]> inputs_21_axes_0 = const()[name = tensor<string, []>("inputs_21_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_5_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18050432)))];
	tensor<fp16, [512]> d_decoders_5_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18051520)))];
	tensor<fp16, [1, 128, 512]> inputs_21_cast_fp16 = layer_norm(axes = inputs_21_axes_0, beta = d_decoders_5_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_norm2_weight_to_fp16, x = linear_26_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_23_cast_fp16 = mul(x = inputs_21_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
	tensor<int32, [3]> x_71_perm_0 = const()[name = tensor<string, []>("x_71_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_153_pad_0 = const()[name = tensor<string, []>("input_153_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_153_mode_0 = const()[name = tensor<string, []>("input_153_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_35_to_fp16 = const()[name = tensor<string, []>("const_35_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_71_cast_fp16 = transpose(perm = x_71_perm_0, x = inputs_23_cast_fp16)[name = tensor<string, []>("transpose_161")];
	tensor<fp16, [1, 512, 138]> input_153_cast_fp16 = pad(constant_val = const_35_to_fp16, mode = input_153_mode_0, pad = input_153_pad_0, x = x_71_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
	tensor<string, []> x_73_pad_type_0 = const()[name = tensor<string, []>("x_73_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_73_groups_0 = const()[name = tensor<string, []>("x_73_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_73_strides_0 = const()[name = tensor<string, []>("x_73_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_73_pad_0 = const()[name = tensor<string, []>("x_73_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_73_dilations_0 = const()[name = tensor<string, []>("x_73_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_5_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_5_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18052608))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18058304))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_73_cast_fp16 = conv(dilations = x_73_dilations_0, groups = x_73_groups_0, pad = x_73_pad_0, pad_type = x_73_pad_type_0, strides = x_73_strides_0, weight = d_decoders_5_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor<string, []>("x_73_cast_fp16")];
	tensor<int32, [3]> x_75_perm_0 = const()[name = tensor<string, []>("x_75_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_75_cast_fp16 = transpose(perm = x_75_perm_0, x = x_73_cast_fp16)[name = tensor<string, []>("transpose_160")];
	tensor<fp16, [1, 128, 512]> input_155_cast_fp16 = add(x = x_75_cast_fp16, y = inputs_23_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_157_cast_fp16 = mul(x = input_155_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_159_cast_fp16 = add(x = input_139_cast_fp16, y = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
	tensor<int32, [1]> x_81_axes_0 = const()[name = tensor<string, []>("x_81_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_5_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18059392)))];
	tensor<fp16, [512]> d_decoders_5_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18060480)))];
	tensor<fp16, [1, 128, 512]> x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, beta = d_decoders_5_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_norm3_weight_to_fp16, x = input_159_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_5_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_5_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18061568))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18323776))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_5_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18324864)))];
	tensor<fp16, [1, 128, 512]> linear_27_cast_fp16 = linear(bias = d_decoders_5_src_attn_linear_q_bias_to_fp16, weight = d_decoders_5_src_attn_linear_q_weight_to_fp16_quantized, x = x_81_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
	tensor<int32, [4]> var_603 = const()[name = tensor<string, []>("op_603"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_604_cast_fp16 = reshape(shape = var_603, x = linear_27_cast_fp16)[name = tensor<string, []>("op_604_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_5_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_5_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18325952))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18850304))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_5_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18852416)))];
	tensor<fp16, [1, 512, 1024]> linear_28_cast_fp16 = linear(bias = d_decoders_5_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_5_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
	tensor<int32, [2]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_609_axis_0 = const()[name = tensor<string, []>("op_609_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_609_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_609_cast_fp16_1 = split(axis = var_609_axis_0, split_sizes = tile_5, x = linear_28_cast_fp16)[name = tensor<string, []>("op_609_cast_fp16")];
	tensor<int32, [4]> var_612 = const()[name = tensor<string, []>("op_612"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_613_cast_fp16 = reshape(shape = var_612, x = var_609_cast_fp16_0)[name = tensor<string, []>("op_613_cast_fp16")];
	tensor<int32, [4]> var_615 = const()[name = tensor<string, []>("op_615"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_616_cast_fp16 = reshape(shape = var_615, x = var_609_cast_fp16_1)[name = tensor<string, []>("op_616_cast_fp16")];
	tensor<int32, [4]> value_11_perm_0 = const()[name = tensor<string, []>("value_11_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_618_to_fp16 = const()[name = tensor<string, []>("op_618_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_23_cast_fp16 = mul(x = var_604_cast_fp16, y = var_618_to_fp16)[name = tensor<string, []>("q_h_23_cast_fp16")];
	tensor<bool, []> scores_21_transpose_x_0 = const()[name = tensor<string, []>("scores_21_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_21_transpose_y_0 = const()[name = tensor<string, []>("scores_21_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_74_perm_0 = const()[name = tensor<string, []>("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_75_perm_0 = const()[name = tensor<string, []>("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_75 = transpose(perm = transpose_75_perm_0, x = var_613_cast_fp16)[name = tensor<string, []>("transpose_158")];
	tensor<fp16, [1, 4, 128, 128]> transpose_74 = transpose(perm = transpose_74_perm_0, x = q_h_23_cast_fp16)[name = tensor<string, []>("transpose_159")];
	tensor<fp16, [1, 4, 128, 512]> scores_21_cast_fp16 = matmul(transpose_x = scores_21_transpose_x_0, transpose_y = scores_21_transpose_y_0, x = transpose_74, y = transpose_75)[name = tensor<string, []>("scores_21_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_23_cast_fp16 = select(a = var_8_to_fp16, b = scores_21_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_23_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_626_cast_fp16 = softmax(axis = var_20, x = scores_23_cast_fp16)[name = tensor<string, []>("op_626_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_161_cast_fp16 = select(a = var_9_to_fp16, b = var_626_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
	tensor<bool, []> x_83_transpose_x_0 = const()[name = tensor<string, []>("x_83_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_83_transpose_y_0 = const()[name = tensor<string, []>("x_83_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_11_cast_fp16 = transpose(perm = value_11_perm_0, x = var_616_cast_fp16)[name = tensor<string, []>("transpose_157")];
	tensor<fp16, [1, 4, 128, 128]> x_83_cast_fp16 = matmul(transpose_x = x_83_transpose_x_0, transpose_y = x_83_transpose_y_0, x = input_161_cast_fp16, y = value_11_cast_fp16)[name = tensor<string, []>("x_83_cast_fp16")];
	tensor<int32, [4]> var_630_perm_0 = const()[name = tensor<string, []>("op_630_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_632 = const()[name = tensor<string, []>("op_632"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_630_cast_fp16 = transpose(perm = var_630_perm_0, x = x_83_cast_fp16)[name = tensor<string, []>("transpose_156")];
	tensor<fp16, [1, 128, 512]> input_163_cast_fp16 = reshape(shape = var_632, x = var_630_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_5_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_5_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18854528))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19116736))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_5_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19117824)))];
	tensor<fp16, [1, 128, 512]> linear_29_cast_fp16 = linear(bias = d_decoders_5_src_attn_linear_out_bias_to_fp16, weight = d_decoders_5_src_attn_linear_out_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_167_cast_fp16 = add(x = input_159_cast_fp16, y = linear_29_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
	tensor<int32, [1]> input_169_axes_0 = const()[name = tensor<string, []>("input_169_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_6_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19118912)))];
	tensor<fp16, [512]> d_decoders_6_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19120000)))];
	tensor<fp16, [1, 128, 512]> input_169_cast_fp16 = layer_norm(axes = input_169_axes_0, beta = d_decoders_6_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_norm1_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_6_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_6_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19121088))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20169728))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_6_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20173888)))];
	tensor<fp16, [1, 128, 2048]> linear_30_cast_fp16 = linear(bias = d_decoders_6_feed_forward_w_1_bias_to_fp16, weight = d_decoders_6_feed_forward_w_1_weight_to_fp16_quantized, x = input_169_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_173_cast_fp16 = relu(x = linear_30_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
	tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_6_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20178048)))];
	tensor<fp16, [2048]> d_decoders_6_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20182208)))];
	tensor<fp16, [1, 128, 2048]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = d_decoders_6_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_feed_forward_norm_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_6_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_6_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20186368))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21235008))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_6_feed_forward_w_2_weight_to_fp16_quantized, x = input_177_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
	tensor<int32, [1]> inputs_25_axes_0 = const()[name = tensor<string, []>("inputs_25_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_6_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21236096)))];
	tensor<fp16, [512]> d_decoders_6_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21237184)))];
	tensor<fp16, [1, 128, 512]> inputs_25_cast_fp16 = layer_norm(axes = inputs_25_axes_0, beta = d_decoders_6_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_norm2_weight_to_fp16, x = linear_31_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_27_cast_fp16 = mul(x = inputs_25_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
	tensor<int32, [3]> x_85_perm_0 = const()[name = tensor<string, []>("x_85_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_181_pad_0 = const()[name = tensor<string, []>("input_181_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_41_to_fp16 = const()[name = tensor<string, []>("const_41_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_85_cast_fp16 = transpose(perm = x_85_perm_0, x = inputs_27_cast_fp16)[name = tensor<string, []>("transpose_155")];
	tensor<fp16, [1, 512, 138]> input_181_cast_fp16 = pad(constant_val = const_41_to_fp16, mode = input_181_mode_0, pad = input_181_pad_0, x = x_85_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
	tensor<string, []> x_87_pad_type_0 = const()[name = tensor<string, []>("x_87_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_87_groups_0 = const()[name = tensor<string, []>("x_87_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_87_strides_0 = const()[name = tensor<string, []>("x_87_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_87_pad_0 = const()[name = tensor<string, []>("x_87_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_87_dilations_0 = const()[name = tensor<string, []>("x_87_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_6_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_6_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21238272))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21243968))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_87_cast_fp16 = conv(dilations = x_87_dilations_0, groups = x_87_groups_0, pad = x_87_pad_0, pad_type = x_87_pad_type_0, strides = x_87_strides_0, weight = d_decoders_6_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_181_cast_fp16)[name = tensor<string, []>("x_87_cast_fp16")];
	tensor<int32, [3]> x_89_perm_0 = const()[name = tensor<string, []>("x_89_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_89_cast_fp16 = transpose(perm = x_89_perm_0, x = x_87_cast_fp16)[name = tensor<string, []>("transpose_154")];
	tensor<fp16, [1, 128, 512]> input_183_cast_fp16 = add(x = x_89_cast_fp16, y = inputs_27_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_185_cast_fp16 = mul(x = input_183_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_187_cast_fp16 = add(x = input_167_cast_fp16, y = input_185_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
	tensor<int32, [1]> x_95_axes_0 = const()[name = tensor<string, []>("x_95_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_6_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21245056)))];
	tensor<fp16, [512]> d_decoders_6_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21246144)))];
	tensor<fp16, [1, 128, 512]> x_95_cast_fp16 = layer_norm(axes = x_95_axes_0, beta = d_decoders_6_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_norm3_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("x_95_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_6_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_6_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21247232))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21509440))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_6_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21510528)))];
	tensor<fp16, [1, 128, 512]> linear_32_cast_fp16 = linear(bias = d_decoders_6_src_attn_linear_q_bias_to_fp16, weight = d_decoders_6_src_attn_linear_q_weight_to_fp16_quantized, x = x_95_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
	tensor<int32, [4]> var_698 = const()[name = tensor<string, []>("op_698"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_699_cast_fp16 = reshape(shape = var_698, x = linear_32_cast_fp16)[name = tensor<string, []>("op_699_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_6_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_6_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21511616))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22035968))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_6_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22038080)))];
	tensor<fp16, [1, 512, 1024]> linear_33_cast_fp16 = linear(bias = d_decoders_6_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_6_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
	tensor<int32, [2]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_704_axis_0 = const()[name = tensor<string, []>("op_704_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_704_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_704_cast_fp16_1 = split(axis = var_704_axis_0, split_sizes = tile_6, x = linear_33_cast_fp16)[name = tensor<string, []>("op_704_cast_fp16")];
	tensor<int32, [4]> var_707 = const()[name = tensor<string, []>("op_707"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_708_cast_fp16 = reshape(shape = var_707, x = var_704_cast_fp16_0)[name = tensor<string, []>("op_708_cast_fp16")];
	tensor<int32, [4]> var_710 = const()[name = tensor<string, []>("op_710"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_711_cast_fp16 = reshape(shape = var_710, x = var_704_cast_fp16_1)[name = tensor<string, []>("op_711_cast_fp16")];
	tensor<int32, [4]> value_13_perm_0 = const()[name = tensor<string, []>("value_13_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_713_to_fp16 = const()[name = tensor<string, []>("op_713_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_27_cast_fp16 = mul(x = var_699_cast_fp16, y = var_713_to_fp16)[name = tensor<string, []>("q_h_27_cast_fp16")];
	tensor<bool, []> scores_25_transpose_x_0 = const()[name = tensor<string, []>("scores_25_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_25_transpose_y_0 = const()[name = tensor<string, []>("scores_25_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_76_perm_0 = const()[name = tensor<string, []>("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_77_perm_0 = const()[name = tensor<string, []>("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_77 = transpose(perm = transpose_77_perm_0, x = var_708_cast_fp16)[name = tensor<string, []>("transpose_152")];
	tensor<fp16, [1, 4, 128, 128]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_h_27_cast_fp16)[name = tensor<string, []>("transpose_153")];
	tensor<fp16, [1, 4, 128, 512]> scores_25_cast_fp16 = matmul(transpose_x = scores_25_transpose_x_0, transpose_y = scores_25_transpose_y_0, x = transpose_76, y = transpose_77)[name = tensor<string, []>("scores_25_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_27_cast_fp16 = select(a = var_8_to_fp16, b = scores_25_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_27_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_721_cast_fp16 = softmax(axis = var_20, x = scores_27_cast_fp16)[name = tensor<string, []>("op_721_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_189_cast_fp16 = select(a = var_9_to_fp16, b = var_721_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
	tensor<bool, []> x_97_transpose_x_0 = const()[name = tensor<string, []>("x_97_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_97_transpose_y_0 = const()[name = tensor<string, []>("x_97_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_13_cast_fp16 = transpose(perm = value_13_perm_0, x = var_711_cast_fp16)[name = tensor<string, []>("transpose_151")];
	tensor<fp16, [1, 4, 128, 128]> x_97_cast_fp16 = matmul(transpose_x = x_97_transpose_x_0, transpose_y = x_97_transpose_y_0, x = input_189_cast_fp16, y = value_13_cast_fp16)[name = tensor<string, []>("x_97_cast_fp16")];
	tensor<int32, [4]> var_725_perm_0 = const()[name = tensor<string, []>("op_725_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_727 = const()[name = tensor<string, []>("op_727"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_725_cast_fp16 = transpose(perm = var_725_perm_0, x = x_97_cast_fp16)[name = tensor<string, []>("transpose_150")];
	tensor<fp16, [1, 128, 512]> input_191_cast_fp16 = reshape(shape = var_727, x = var_725_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_6_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_6_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22040192))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22302400))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_6_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22303488)))];
	tensor<fp16, [1, 128, 512]> linear_34_cast_fp16 = linear(bias = d_decoders_6_src_attn_linear_out_bias_to_fp16, weight = d_decoders_6_src_attn_linear_out_weight_to_fp16_quantized, x = input_191_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_195_cast_fp16 = add(x = input_187_cast_fp16, y = linear_34_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
	tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_7_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22304576)))];
	tensor<fp16, [512]> d_decoders_7_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22305664)))];
	tensor<fp16, [1, 128, 512]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = d_decoders_7_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_norm1_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_7_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_7_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22306752))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23355392))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_7_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23359552)))];
	tensor<fp16, [1, 128, 2048]> linear_35_cast_fp16 = linear(bias = d_decoders_7_feed_forward_w_1_bias_to_fp16, weight = d_decoders_7_feed_forward_w_1_weight_to_fp16_quantized, x = input_197_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_201_cast_fp16 = relu(x = linear_35_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
	tensor<int32, [1]> input_205_axes_0 = const()[name = tensor<string, []>("input_205_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_7_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23363712)))];
	tensor<fp16, [2048]> d_decoders_7_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23367872)))];
	tensor<fp16, [1, 128, 2048]> input_205_cast_fp16 = layer_norm(axes = input_205_axes_0, beta = d_decoders_7_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_feed_forward_norm_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("input_205_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_7_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_7_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23372032))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24420672))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_7_feed_forward_w_2_weight_to_fp16_quantized, x = input_205_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
	tensor<int32, [1]> inputs_29_axes_0 = const()[name = tensor<string, []>("inputs_29_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_7_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24421760)))];
	tensor<fp16, [512]> d_decoders_7_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24422848)))];
	tensor<fp16, [1, 128, 512]> inputs_29_cast_fp16 = layer_norm(axes = inputs_29_axes_0, beta = d_decoders_7_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_norm2_weight_to_fp16, x = linear_36_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_31_cast_fp16 = mul(x = inputs_29_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
	tensor<int32, [3]> x_99_perm_0 = const()[name = tensor<string, []>("x_99_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_209_mode_0 = const()[name = tensor<string, []>("input_209_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_47_to_fp16 = const()[name = tensor<string, []>("const_47_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_99_cast_fp16 = transpose(perm = x_99_perm_0, x = inputs_31_cast_fp16)[name = tensor<string, []>("transpose_149")];
	tensor<fp16, [1, 512, 138]> input_209_cast_fp16 = pad(constant_val = const_47_to_fp16, mode = input_209_mode_0, pad = input_209_pad_0, x = x_99_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
	tensor<string, []> x_101_pad_type_0 = const()[name = tensor<string, []>("x_101_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_101_groups_0 = const()[name = tensor<string, []>("x_101_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_101_strides_0 = const()[name = tensor<string, []>("x_101_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_101_pad_0 = const()[name = tensor<string, []>("x_101_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_101_dilations_0 = const()[name = tensor<string, []>("x_101_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_7_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_7_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24423936))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24429632))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_101_cast_fp16 = conv(dilations = x_101_dilations_0, groups = x_101_groups_0, pad = x_101_pad_0, pad_type = x_101_pad_type_0, strides = x_101_strides_0, weight = d_decoders_7_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_209_cast_fp16)[name = tensor<string, []>("x_101_cast_fp16")];
	tensor<int32, [3]> x_103_perm_0 = const()[name = tensor<string, []>("x_103_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_103_cast_fp16 = transpose(perm = x_103_perm_0, x = x_101_cast_fp16)[name = tensor<string, []>("transpose_148")];
	tensor<fp16, [1, 128, 512]> input_211_cast_fp16 = add(x = x_103_cast_fp16, y = inputs_31_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_213_cast_fp16 = mul(x = input_211_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_215_cast_fp16 = add(x = input_195_cast_fp16, y = input_213_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
	tensor<int32, [1]> x_109_axes_0 = const()[name = tensor<string, []>("x_109_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_7_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24430720)))];
	tensor<fp16, [512]> d_decoders_7_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24431808)))];
	tensor<fp16, [1, 128, 512]> x_109_cast_fp16 = layer_norm(axes = x_109_axes_0, beta = d_decoders_7_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_norm3_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("x_109_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_7_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_7_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24432896))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24695104))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_7_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24696192)))];
	tensor<fp16, [1, 128, 512]> linear_37_cast_fp16 = linear(bias = d_decoders_7_src_attn_linear_q_bias_to_fp16, weight = d_decoders_7_src_attn_linear_q_weight_to_fp16_quantized, x = x_109_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
	tensor<int32, [4]> var_793 = const()[name = tensor<string, []>("op_793"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_794_cast_fp16 = reshape(shape = var_793, x = linear_37_cast_fp16)[name = tensor<string, []>("op_794_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_7_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_7_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24697280))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25221632))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_7_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25223744)))];
	tensor<fp16, [1, 512, 1024]> linear_38_cast_fp16 = linear(bias = d_decoders_7_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_7_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
	tensor<int32, [2]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_799_axis_0 = const()[name = tensor<string, []>("op_799_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_799_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_799_cast_fp16_1 = split(axis = var_799_axis_0, split_sizes = tile_7, x = linear_38_cast_fp16)[name = tensor<string, []>("op_799_cast_fp16")];
	tensor<int32, [4]> var_802 = const()[name = tensor<string, []>("op_802"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_803_cast_fp16 = reshape(shape = var_802, x = var_799_cast_fp16_0)[name = tensor<string, []>("op_803_cast_fp16")];
	tensor<int32, [4]> var_805 = const()[name = tensor<string, []>("op_805"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_806_cast_fp16 = reshape(shape = var_805, x = var_799_cast_fp16_1)[name = tensor<string, []>("op_806_cast_fp16")];
	tensor<int32, [4]> value_15_perm_0 = const()[name = tensor<string, []>("value_15_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_808_to_fp16 = const()[name = tensor<string, []>("op_808_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_31_cast_fp16 = mul(x = var_794_cast_fp16, y = var_808_to_fp16)[name = tensor<string, []>("q_h_31_cast_fp16")];
	tensor<bool, []> scores_29_transpose_x_0 = const()[name = tensor<string, []>("scores_29_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_29_transpose_y_0 = const()[name = tensor<string, []>("scores_29_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_78_perm_0 = const()[name = tensor<string, []>("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_79_perm_0 = const()[name = tensor<string, []>("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_79 = transpose(perm = transpose_79_perm_0, x = var_803_cast_fp16)[name = tensor<string, []>("transpose_146")];
	tensor<fp16, [1, 4, 128, 128]> transpose_78 = transpose(perm = transpose_78_perm_0, x = q_h_31_cast_fp16)[name = tensor<string, []>("transpose_147")];
	tensor<fp16, [1, 4, 128, 512]> scores_29_cast_fp16 = matmul(transpose_x = scores_29_transpose_x_0, transpose_y = scores_29_transpose_y_0, x = transpose_78, y = transpose_79)[name = tensor<string, []>("scores_29_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_31_cast_fp16 = select(a = var_8_to_fp16, b = scores_29_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_31_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_816_cast_fp16 = softmax(axis = var_20, x = scores_31_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_217_cast_fp16 = select(a = var_9_to_fp16, b = var_816_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
	tensor<bool, []> x_111_transpose_x_0 = const()[name = tensor<string, []>("x_111_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_111_transpose_y_0 = const()[name = tensor<string, []>("x_111_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_15_cast_fp16 = transpose(perm = value_15_perm_0, x = var_806_cast_fp16)[name = tensor<string, []>("transpose_145")];
	tensor<fp16, [1, 4, 128, 128]> x_111_cast_fp16 = matmul(transpose_x = x_111_transpose_x_0, transpose_y = x_111_transpose_y_0, x = input_217_cast_fp16, y = value_15_cast_fp16)[name = tensor<string, []>("x_111_cast_fp16")];
	tensor<int32, [4]> var_820_perm_0 = const()[name = tensor<string, []>("op_820_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_822 = const()[name = tensor<string, []>("op_822"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_820_cast_fp16 = transpose(perm = var_820_perm_0, x = x_111_cast_fp16)[name = tensor<string, []>("transpose_144")];
	tensor<fp16, [1, 128, 512]> input_219_cast_fp16 = reshape(shape = var_822, x = var_820_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_7_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_7_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25225856))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25488064))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_7_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25489152)))];
	tensor<fp16, [1, 128, 512]> linear_39_cast_fp16 = linear(bias = d_decoders_7_src_attn_linear_out_bias_to_fp16, weight = d_decoders_7_src_attn_linear_out_weight_to_fp16_quantized, x = input_219_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_223_cast_fp16 = add(x = input_215_cast_fp16, y = linear_39_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
	tensor<int32, [1]> input_225_axes_0 = const()[name = tensor<string, []>("input_225_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_8_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25490240)))];
	tensor<fp16, [512]> d_decoders_8_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25491328)))];
	tensor<fp16, [1, 128, 512]> input_225_cast_fp16 = layer_norm(axes = input_225_axes_0, beta = d_decoders_8_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_norm1_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("input_225_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_8_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_8_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25492416))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26541056))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_8_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26545216)))];
	tensor<fp16, [1, 128, 2048]> linear_40_cast_fp16 = linear(bias = d_decoders_8_feed_forward_w_1_bias_to_fp16, weight = d_decoders_8_feed_forward_w_1_weight_to_fp16_quantized, x = input_225_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_229_cast_fp16 = relu(x = linear_40_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
	tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_8_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26549376)))];
	tensor<fp16, [2048]> d_decoders_8_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26553536)))];
	tensor<fp16, [1, 128, 2048]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = d_decoders_8_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_feed_forward_norm_weight_to_fp16, x = input_229_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_8_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_8_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26557696))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27606336))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_8_feed_forward_w_2_weight_to_fp16_quantized, x = input_233_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
	tensor<int32, [1]> inputs_33_axes_0 = const()[name = tensor<string, []>("inputs_33_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_8_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27607424)))];
	tensor<fp16, [512]> d_decoders_8_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27608512)))];
	tensor<fp16, [1, 128, 512]> inputs_33_cast_fp16 = layer_norm(axes = inputs_33_axes_0, beta = d_decoders_8_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_norm2_weight_to_fp16, x = linear_41_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_35_cast_fp16 = mul(x = inputs_33_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
	tensor<int32, [3]> x_113_perm_0 = const()[name = tensor<string, []>("x_113_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_237_pad_0 = const()[name = tensor<string, []>("input_237_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_237_mode_0 = const()[name = tensor<string, []>("input_237_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_53_to_fp16 = const()[name = tensor<string, []>("const_53_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_113_cast_fp16 = transpose(perm = x_113_perm_0, x = inputs_35_cast_fp16)[name = tensor<string, []>("transpose_143")];
	tensor<fp16, [1, 512, 138]> input_237_cast_fp16 = pad(constant_val = const_53_to_fp16, mode = input_237_mode_0, pad = input_237_pad_0, x = x_113_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
	tensor<string, []> x_115_pad_type_0 = const()[name = tensor<string, []>("x_115_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_115_groups_0 = const()[name = tensor<string, []>("x_115_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_115_strides_0 = const()[name = tensor<string, []>("x_115_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_115_pad_0 = const()[name = tensor<string, []>("x_115_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_115_dilations_0 = const()[name = tensor<string, []>("x_115_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_8_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_8_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27609600))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27615296))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_115_cast_fp16 = conv(dilations = x_115_dilations_0, groups = x_115_groups_0, pad = x_115_pad_0, pad_type = x_115_pad_type_0, strides = x_115_strides_0, weight = d_decoders_8_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_237_cast_fp16)[name = tensor<string, []>("x_115_cast_fp16")];
	tensor<int32, [3]> x_117_perm_0 = const()[name = tensor<string, []>("x_117_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_117_cast_fp16 = transpose(perm = x_117_perm_0, x = x_115_cast_fp16)[name = tensor<string, []>("transpose_142")];
	tensor<fp16, [1, 128, 512]> input_239_cast_fp16 = add(x = x_117_cast_fp16, y = inputs_35_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_241_cast_fp16 = mul(x = input_239_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_243_cast_fp16 = add(x = input_223_cast_fp16, y = input_241_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
	tensor<int32, [1]> x_123_axes_0 = const()[name = tensor<string, []>("x_123_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_8_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27616384)))];
	tensor<fp16, [512]> d_decoders_8_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27617472)))];
	tensor<fp16, [1, 128, 512]> x_123_cast_fp16 = layer_norm(axes = x_123_axes_0, beta = d_decoders_8_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_norm3_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("x_123_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_8_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_8_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27618560))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27880768))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_8_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27881856)))];
	tensor<fp16, [1, 128, 512]> linear_42_cast_fp16 = linear(bias = d_decoders_8_src_attn_linear_q_bias_to_fp16, weight = d_decoders_8_src_attn_linear_q_weight_to_fp16_quantized, x = x_123_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")];
	tensor<int32, [4]> var_888 = const()[name = tensor<string, []>("op_888"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_889_cast_fp16 = reshape(shape = var_888, x = linear_42_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_8_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_8_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27882944))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28407296))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_8_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28409408)))];
	tensor<fp16, [1, 512, 1024]> linear_43_cast_fp16 = linear(bias = d_decoders_8_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_8_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
	tensor<int32, [2]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_894_axis_0 = const()[name = tensor<string, []>("op_894_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_894_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_894_cast_fp16_1 = split(axis = var_894_axis_0, split_sizes = tile_8, x = linear_43_cast_fp16)[name = tensor<string, []>("op_894_cast_fp16")];
	tensor<int32, [4]> var_897 = const()[name = tensor<string, []>("op_897"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_898_cast_fp16 = reshape(shape = var_897, x = var_894_cast_fp16_0)[name = tensor<string, []>("op_898_cast_fp16")];
	tensor<int32, [4]> var_900 = const()[name = tensor<string, []>("op_900"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_901_cast_fp16 = reshape(shape = var_900, x = var_894_cast_fp16_1)[name = tensor<string, []>("op_901_cast_fp16")];
	tensor<int32, [4]> value_17_perm_0 = const()[name = tensor<string, []>("value_17_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_903_to_fp16 = const()[name = tensor<string, []>("op_903_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_35_cast_fp16 = mul(x = var_889_cast_fp16, y = var_903_to_fp16)[name = tensor<string, []>("q_h_35_cast_fp16")];
	tensor<bool, []> scores_33_transpose_x_0 = const()[name = tensor<string, []>("scores_33_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_33_transpose_y_0 = const()[name = tensor<string, []>("scores_33_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_80_perm_0 = const()[name = tensor<string, []>("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_81_perm_0 = const()[name = tensor<string, []>("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_81 = transpose(perm = transpose_81_perm_0, x = var_898_cast_fp16)[name = tensor<string, []>("transpose_140")];
	tensor<fp16, [1, 4, 128, 128]> transpose_80 = transpose(perm = transpose_80_perm_0, x = q_h_35_cast_fp16)[name = tensor<string, []>("transpose_141")];
	tensor<fp16, [1, 4, 128, 512]> scores_33_cast_fp16 = matmul(transpose_x = scores_33_transpose_x_0, transpose_y = scores_33_transpose_y_0, x = transpose_80, y = transpose_81)[name = tensor<string, []>("scores_33_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_35_cast_fp16 = select(a = var_8_to_fp16, b = scores_33_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_35_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_911_cast_fp16 = softmax(axis = var_20, x = scores_35_cast_fp16)[name = tensor<string, []>("op_911_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_245_cast_fp16 = select(a = var_9_to_fp16, b = var_911_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_245_cast_fp16")];
	tensor<bool, []> x_125_transpose_x_0 = const()[name = tensor<string, []>("x_125_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_125_transpose_y_0 = const()[name = tensor<string, []>("x_125_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_17_cast_fp16 = transpose(perm = value_17_perm_0, x = var_901_cast_fp16)[name = tensor<string, []>("transpose_139")];
	tensor<fp16, [1, 4, 128, 128]> x_125_cast_fp16 = matmul(transpose_x = x_125_transpose_x_0, transpose_y = x_125_transpose_y_0, x = input_245_cast_fp16, y = value_17_cast_fp16)[name = tensor<string, []>("x_125_cast_fp16")];
	tensor<int32, [4]> var_915_perm_0 = const()[name = tensor<string, []>("op_915_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_917 = const()[name = tensor<string, []>("op_917"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_915_cast_fp16 = transpose(perm = var_915_perm_0, x = x_125_cast_fp16)[name = tensor<string, []>("transpose_138")];
	tensor<fp16, [1, 128, 512]> input_247_cast_fp16 = reshape(shape = var_917, x = var_915_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_8_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_8_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28411520))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28673728))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_8_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28674816)))];
	tensor<fp16, [1, 128, 512]> linear_44_cast_fp16 = linear(bias = d_decoders_8_src_attn_linear_out_bias_to_fp16, weight = d_decoders_8_src_attn_linear_out_weight_to_fp16_quantized, x = input_247_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_251_cast_fp16 = add(x = input_243_cast_fp16, y = linear_44_cast_fp16)[name = tensor<string, []>("input_251_cast_fp16")];
	tensor<int32, [1]> input_253_axes_0 = const()[name = tensor<string, []>("input_253_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_9_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28675904)))];
	tensor<fp16, [512]> d_decoders_9_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28676992)))];
	tensor<fp16, [1, 128, 512]> input_253_cast_fp16 = layer_norm(axes = input_253_axes_0, beta = d_decoders_9_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_norm1_weight_to_fp16, x = input_251_cast_fp16)[name = tensor<string, []>("input_253_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_9_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_9_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28678080))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29726720))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_9_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29730880)))];
	tensor<fp16, [1, 128, 2048]> linear_45_cast_fp16 = linear(bias = d_decoders_9_feed_forward_w_1_bias_to_fp16, weight = d_decoders_9_feed_forward_w_1_weight_to_fp16_quantized, x = input_253_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_257_cast_fp16 = relu(x = linear_45_cast_fp16)[name = tensor<string, []>("input_257_cast_fp16")];
	tensor<int32, [1]> input_261_axes_0 = const()[name = tensor<string, []>("input_261_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_9_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29735040)))];
	tensor<fp16, [2048]> d_decoders_9_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29739200)))];
	tensor<fp16, [1, 128, 2048]> input_261_cast_fp16 = layer_norm(axes = input_261_axes_0, beta = d_decoders_9_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_feed_forward_norm_weight_to_fp16, x = input_257_cast_fp16)[name = tensor<string, []>("input_261_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_9_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_9_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29743360))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30792000))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_9_feed_forward_w_2_weight_to_fp16_quantized, x = input_261_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
	tensor<int32, [1]> inputs_37_axes_0 = const()[name = tensor<string, []>("inputs_37_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_9_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30793088)))];
	tensor<fp16, [512]> d_decoders_9_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30794176)))];
	tensor<fp16, [1, 128, 512]> inputs_37_cast_fp16 = layer_norm(axes = inputs_37_axes_0, beta = d_decoders_9_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_norm2_weight_to_fp16, x = linear_46_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_39_cast_fp16 = mul(x = inputs_37_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
	tensor<int32, [3]> x_127_perm_0 = const()[name = tensor<string, []>("x_127_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_265_pad_0 = const()[name = tensor<string, []>("input_265_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_265_mode_0 = const()[name = tensor<string, []>("input_265_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_59_to_fp16 = const()[name = tensor<string, []>("const_59_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_127_cast_fp16 = transpose(perm = x_127_perm_0, x = inputs_39_cast_fp16)[name = tensor<string, []>("transpose_137")];
	tensor<fp16, [1, 512, 138]> input_265_cast_fp16 = pad(constant_val = const_59_to_fp16, mode = input_265_mode_0, pad = input_265_pad_0, x = x_127_cast_fp16)[name = tensor<string, []>("input_265_cast_fp16")];
	tensor<string, []> x_129_pad_type_0 = const()[name = tensor<string, []>("x_129_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_129_groups_0 = const()[name = tensor<string, []>("x_129_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_129_strides_0 = const()[name = tensor<string, []>("x_129_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_129_pad_0 = const()[name = tensor<string, []>("x_129_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_129_dilations_0 = const()[name = tensor<string, []>("x_129_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_9_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_9_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30795264))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30800960))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_129_cast_fp16 = conv(dilations = x_129_dilations_0, groups = x_129_groups_0, pad = x_129_pad_0, pad_type = x_129_pad_type_0, strides = x_129_strides_0, weight = d_decoders_9_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_265_cast_fp16)[name = tensor<string, []>("x_129_cast_fp16")];
	tensor<int32, [3]> x_131_perm_0 = const()[name = tensor<string, []>("x_131_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_131_cast_fp16 = transpose(perm = x_131_perm_0, x = x_129_cast_fp16)[name = tensor<string, []>("transpose_136")];
	tensor<fp16, [1, 128, 512]> input_267_cast_fp16 = add(x = x_131_cast_fp16, y = inputs_39_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_269_cast_fp16 = mul(x = input_267_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_271_cast_fp16 = add(x = input_251_cast_fp16, y = input_269_cast_fp16)[name = tensor<string, []>("input_271_cast_fp16")];
	tensor<int32, [1]> x_137_axes_0 = const()[name = tensor<string, []>("x_137_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_9_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30802048)))];
	tensor<fp16, [512]> d_decoders_9_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30803136)))];
	tensor<fp16, [1, 128, 512]> x_137_cast_fp16 = layer_norm(axes = x_137_axes_0, beta = d_decoders_9_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_norm3_weight_to_fp16, x = input_271_cast_fp16)[name = tensor<string, []>("x_137_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_9_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_9_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30804224))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31066432))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_9_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31067520)))];
	tensor<fp16, [1, 128, 512]> linear_47_cast_fp16 = linear(bias = d_decoders_9_src_attn_linear_q_bias_to_fp16, weight = d_decoders_9_src_attn_linear_q_weight_to_fp16_quantized, x = x_137_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
	tensor<int32, [4]> var_983 = const()[name = tensor<string, []>("op_983"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_984_cast_fp16 = reshape(shape = var_983, x = linear_47_cast_fp16)[name = tensor<string, []>("op_984_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_9_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_9_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31068608))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31592960))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_9_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31595072)))];
	tensor<fp16, [1, 512, 1024]> linear_48_cast_fp16 = linear(bias = d_decoders_9_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_9_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
	tensor<int32, [2]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_989_axis_0 = const()[name = tensor<string, []>("op_989_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_989_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_989_cast_fp16_1 = split(axis = var_989_axis_0, split_sizes = tile_9, x = linear_48_cast_fp16)[name = tensor<string, []>("op_989_cast_fp16")];
	tensor<int32, [4]> var_992 = const()[name = tensor<string, []>("op_992"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_993_cast_fp16 = reshape(shape = var_992, x = var_989_cast_fp16_0)[name = tensor<string, []>("op_993_cast_fp16")];
	tensor<int32, [4]> var_995 = const()[name = tensor<string, []>("op_995"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_996_cast_fp16 = reshape(shape = var_995, x = var_989_cast_fp16_1)[name = tensor<string, []>("op_996_cast_fp16")];
	tensor<int32, [4]> value_19_perm_0 = const()[name = tensor<string, []>("value_19_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_998_to_fp16 = const()[name = tensor<string, []>("op_998_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_39_cast_fp16 = mul(x = var_984_cast_fp16, y = var_998_to_fp16)[name = tensor<string, []>("q_h_39_cast_fp16")];
	tensor<bool, []> scores_37_transpose_x_0 = const()[name = tensor<string, []>("scores_37_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_37_transpose_y_0 = const()[name = tensor<string, []>("scores_37_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_82_perm_0 = const()[name = tensor<string, []>("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_83_perm_0 = const()[name = tensor<string, []>("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_83 = transpose(perm = transpose_83_perm_0, x = var_993_cast_fp16)[name = tensor<string, []>("transpose_134")];
	tensor<fp16, [1, 4, 128, 128]> transpose_82 = transpose(perm = transpose_82_perm_0, x = q_h_39_cast_fp16)[name = tensor<string, []>("transpose_135")];
	tensor<fp16, [1, 4, 128, 512]> scores_37_cast_fp16 = matmul(transpose_x = scores_37_transpose_x_0, transpose_y = scores_37_transpose_y_0, x = transpose_82, y = transpose_83)[name = tensor<string, []>("scores_37_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_39_cast_fp16 = select(a = var_8_to_fp16, b = scores_37_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_39_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1006_cast_fp16 = softmax(axis = var_20, x = scores_39_cast_fp16)[name = tensor<string, []>("op_1006_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_273_cast_fp16 = select(a = var_9_to_fp16, b = var_1006_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
	tensor<bool, []> x_139_transpose_x_0 = const()[name = tensor<string, []>("x_139_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_139_transpose_y_0 = const()[name = tensor<string, []>("x_139_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_19_cast_fp16 = transpose(perm = value_19_perm_0, x = var_996_cast_fp16)[name = tensor<string, []>("transpose_133")];
	tensor<fp16, [1, 4, 128, 128]> x_139_cast_fp16 = matmul(transpose_x = x_139_transpose_x_0, transpose_y = x_139_transpose_y_0, x = input_273_cast_fp16, y = value_19_cast_fp16)[name = tensor<string, []>("x_139_cast_fp16")];
	tensor<int32, [4]> var_1010_perm_0 = const()[name = tensor<string, []>("op_1010_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1012 = const()[name = tensor<string, []>("op_1012"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1010_cast_fp16 = transpose(perm = var_1010_perm_0, x = x_139_cast_fp16)[name = tensor<string, []>("transpose_132")];
	tensor<fp16, [1, 128, 512]> input_275_cast_fp16 = reshape(shape = var_1012, x = var_1010_cast_fp16)[name = tensor<string, []>("input_275_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_9_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_9_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31597184))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31859392))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_9_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31860480)))];
	tensor<fp16, [1, 128, 512]> linear_49_cast_fp16 = linear(bias = d_decoders_9_src_attn_linear_out_bias_to_fp16, weight = d_decoders_9_src_attn_linear_out_weight_to_fp16_quantized, x = input_275_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_279_cast_fp16 = add(x = input_271_cast_fp16, y = linear_49_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
	tensor<int32, [1]> input_281_axes_0 = const()[name = tensor<string, []>("input_281_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_10_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31861568)))];
	tensor<fp16, [512]> d_decoders_10_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31862656)))];
	tensor<fp16, [1, 128, 512]> input_281_cast_fp16 = layer_norm(axes = input_281_axes_0, beta = d_decoders_10_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_norm1_weight_to_fp16, x = input_279_cast_fp16)[name = tensor<string, []>("input_281_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_10_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_10_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31863744))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32912384))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_10_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32916544)))];
	tensor<fp16, [1, 128, 2048]> linear_50_cast_fp16 = linear(bias = d_decoders_10_feed_forward_w_1_bias_to_fp16, weight = d_decoders_10_feed_forward_w_1_weight_to_fp16_quantized, x = input_281_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_285_cast_fp16 = relu(x = linear_50_cast_fp16)[name = tensor<string, []>("input_285_cast_fp16")];
	tensor<int32, [1]> input_289_axes_0 = const()[name = tensor<string, []>("input_289_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_10_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32920704)))];
	tensor<fp16, [2048]> d_decoders_10_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32924864)))];
	tensor<fp16, [1, 128, 2048]> input_289_cast_fp16 = layer_norm(axes = input_289_axes_0, beta = d_decoders_10_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_feed_forward_norm_weight_to_fp16, x = input_285_cast_fp16)[name = tensor<string, []>("input_289_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_10_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_10_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32929024))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33977664))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_10_feed_forward_w_2_weight_to_fp16_quantized, x = input_289_cast_fp16)[name = tensor<string, []>("linear_51_cast_fp16")];
	tensor<int32, [1]> inputs_41_axes_0 = const()[name = tensor<string, []>("inputs_41_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_10_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33978752)))];
	tensor<fp16, [512]> d_decoders_10_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33979840)))];
	tensor<fp16, [1, 128, 512]> inputs_41_cast_fp16 = layer_norm(axes = inputs_41_axes_0, beta = d_decoders_10_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_norm2_weight_to_fp16, x = linear_51_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_43_cast_fp16 = mul(x = inputs_41_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
	tensor<int32, [3]> x_141_perm_0 = const()[name = tensor<string, []>("x_141_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_293_pad_0 = const()[name = tensor<string, []>("input_293_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_293_mode_0 = const()[name = tensor<string, []>("input_293_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_65_to_fp16 = const()[name = tensor<string, []>("const_65_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_141_cast_fp16 = transpose(perm = x_141_perm_0, x = inputs_43_cast_fp16)[name = tensor<string, []>("transpose_131")];
	tensor<fp16, [1, 512, 138]> input_293_cast_fp16 = pad(constant_val = const_65_to_fp16, mode = input_293_mode_0, pad = input_293_pad_0, x = x_141_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
	tensor<string, []> x_143_pad_type_0 = const()[name = tensor<string, []>("x_143_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_143_groups_0 = const()[name = tensor<string, []>("x_143_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_143_strides_0 = const()[name = tensor<string, []>("x_143_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_143_pad_0 = const()[name = tensor<string, []>("x_143_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_143_dilations_0 = const()[name = tensor<string, []>("x_143_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_10_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_10_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33980928))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33986624))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = d_decoders_10_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_293_cast_fp16)[name = tensor<string, []>("x_143_cast_fp16")];
	tensor<int32, [3]> x_145_perm_0 = const()[name = tensor<string, []>("x_145_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_145_cast_fp16 = transpose(perm = x_145_perm_0, x = x_143_cast_fp16)[name = tensor<string, []>("transpose_130")];
	tensor<fp16, [1, 128, 512]> input_295_cast_fp16 = add(x = x_145_cast_fp16, y = inputs_43_cast_fp16)[name = tensor<string, []>("input_295_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_297_cast_fp16 = mul(x = input_295_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_297_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_299_cast_fp16 = add(x = input_279_cast_fp16, y = input_297_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
	tensor<int32, [1]> x_151_axes_0 = const()[name = tensor<string, []>("x_151_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_10_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33987712)))];
	tensor<fp16, [512]> d_decoders_10_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33988800)))];
	tensor<fp16, [1, 128, 512]> x_151_cast_fp16 = layer_norm(axes = x_151_axes_0, beta = d_decoders_10_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_norm3_weight_to_fp16, x = input_299_cast_fp16)[name = tensor<string, []>("x_151_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_10_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_10_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33989888))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34252096))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_10_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34253184)))];
	tensor<fp16, [1, 128, 512]> linear_52_cast_fp16 = linear(bias = d_decoders_10_src_attn_linear_q_bias_to_fp16, weight = d_decoders_10_src_attn_linear_q_weight_to_fp16_quantized, x = x_151_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")];
	tensor<int32, [4]> var_1078 = const()[name = tensor<string, []>("op_1078"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_1079_cast_fp16 = reshape(shape = var_1078, x = linear_52_cast_fp16)[name = tensor<string, []>("op_1079_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_10_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_10_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34254272))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34778624))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_10_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34780736)))];
	tensor<fp16, [1, 512, 1024]> linear_53_cast_fp16 = linear(bias = d_decoders_10_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_10_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_53_cast_fp16")];
	tensor<int32, [2]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_1084_axis_0 = const()[name = tensor<string, []>("op_1084_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_1084_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1084_cast_fp16_1 = split(axis = var_1084_axis_0, split_sizes = tile_10, x = linear_53_cast_fp16)[name = tensor<string, []>("op_1084_cast_fp16")];
	tensor<int32, [4]> var_1087 = const()[name = tensor<string, []>("op_1087"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1088_cast_fp16 = reshape(shape = var_1087, x = var_1084_cast_fp16_0)[name = tensor<string, []>("op_1088_cast_fp16")];
	tensor<int32, [4]> var_1090 = const()[name = tensor<string, []>("op_1090"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1091_cast_fp16 = reshape(shape = var_1090, x = var_1084_cast_fp16_1)[name = tensor<string, []>("op_1091_cast_fp16")];
	tensor<int32, [4]> value_21_perm_0 = const()[name = tensor<string, []>("value_21_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_1093_to_fp16 = const()[name = tensor<string, []>("op_1093_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_43_cast_fp16 = mul(x = var_1079_cast_fp16, y = var_1093_to_fp16)[name = tensor<string, []>("q_h_43_cast_fp16")];
	tensor<bool, []> scores_41_transpose_x_0 = const()[name = tensor<string, []>("scores_41_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_41_transpose_y_0 = const()[name = tensor<string, []>("scores_41_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_84_perm_0 = const()[name = tensor<string, []>("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_85_perm_0 = const()[name = tensor<string, []>("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_85 = transpose(perm = transpose_85_perm_0, x = var_1088_cast_fp16)[name = tensor<string, []>("transpose_128")];
	tensor<fp16, [1, 4, 128, 128]> transpose_84 = transpose(perm = transpose_84_perm_0, x = q_h_43_cast_fp16)[name = tensor<string, []>("transpose_129")];
	tensor<fp16, [1, 4, 128, 512]> scores_41_cast_fp16 = matmul(transpose_x = scores_41_transpose_x_0, transpose_y = scores_41_transpose_y_0, x = transpose_84, y = transpose_85)[name = tensor<string, []>("scores_41_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_43_cast_fp16 = select(a = var_8_to_fp16, b = scores_41_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_43_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1101_cast_fp16 = softmax(axis = var_20, x = scores_43_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_301_cast_fp16 = select(a = var_9_to_fp16, b = var_1101_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_301_cast_fp16")];
	tensor<bool, []> x_153_transpose_x_0 = const()[name = tensor<string, []>("x_153_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_153_transpose_y_0 = const()[name = tensor<string, []>("x_153_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_21_cast_fp16 = transpose(perm = value_21_perm_0, x = var_1091_cast_fp16)[name = tensor<string, []>("transpose_127")];
	tensor<fp16, [1, 4, 128, 128]> x_153_cast_fp16 = matmul(transpose_x = x_153_transpose_x_0, transpose_y = x_153_transpose_y_0, x = input_301_cast_fp16, y = value_21_cast_fp16)[name = tensor<string, []>("x_153_cast_fp16")];
	tensor<int32, [4]> var_1105_perm_0 = const()[name = tensor<string, []>("op_1105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1107 = const()[name = tensor<string, []>("op_1107"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1105_cast_fp16 = transpose(perm = var_1105_perm_0, x = x_153_cast_fp16)[name = tensor<string, []>("transpose_126")];
	tensor<fp16, [1, 128, 512]> input_303_cast_fp16 = reshape(shape = var_1107, x = var_1105_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_10_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_10_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34782848))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35045056))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_10_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35046144)))];
	tensor<fp16, [1, 128, 512]> linear_54_cast_fp16 = linear(bias = d_decoders_10_src_attn_linear_out_bias_to_fp16, weight = d_decoders_10_src_attn_linear_out_weight_to_fp16_quantized, x = input_303_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_307_cast_fp16 = add(x = input_299_cast_fp16, y = linear_54_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
	tensor<int32, [1]> input_309_axes_0 = const()[name = tensor<string, []>("input_309_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_11_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35047232)))];
	tensor<fp16, [512]> d_decoders_11_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35048320)))];
	tensor<fp16, [1, 128, 512]> input_309_cast_fp16 = layer_norm(axes = input_309_axes_0, beta = d_decoders_11_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_norm1_weight_to_fp16, x = input_307_cast_fp16)[name = tensor<string, []>("input_309_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_11_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_11_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35049408))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36098048))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_11_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36102208)))];
	tensor<fp16, [1, 128, 2048]> linear_55_cast_fp16 = linear(bias = d_decoders_11_feed_forward_w_1_bias_to_fp16, weight = d_decoders_11_feed_forward_w_1_weight_to_fp16_quantized, x = input_309_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_313_cast_fp16 = relu(x = linear_55_cast_fp16)[name = tensor<string, []>("input_313_cast_fp16")];
	tensor<int32, [1]> input_317_axes_0 = const()[name = tensor<string, []>("input_317_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_11_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36106368)))];
	tensor<fp16, [2048]> d_decoders_11_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36110528)))];
	tensor<fp16, [1, 128, 2048]> input_317_cast_fp16 = layer_norm(axes = input_317_axes_0, beta = d_decoders_11_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_feed_forward_norm_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("input_317_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_11_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_11_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36114688))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37163328))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_11_feed_forward_w_2_weight_to_fp16_quantized, x = input_317_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")];
	tensor<int32, [1]> inputs_45_axes_0 = const()[name = tensor<string, []>("inputs_45_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_11_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37164416)))];
	tensor<fp16, [512]> d_decoders_11_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37165504)))];
	tensor<fp16, [1, 128, 512]> inputs_45_cast_fp16 = layer_norm(axes = inputs_45_axes_0, beta = d_decoders_11_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_norm2_weight_to_fp16, x = linear_56_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_47_cast_fp16 = mul(x = inputs_45_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
	tensor<int32, [3]> x_155_perm_0 = const()[name = tensor<string, []>("x_155_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_321_pad_0 = const()[name = tensor<string, []>("input_321_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_321_mode_0 = const()[name = tensor<string, []>("input_321_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_71_to_fp16 = const()[name = tensor<string, []>("const_71_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_155_cast_fp16 = transpose(perm = x_155_perm_0, x = inputs_47_cast_fp16)[name = tensor<string, []>("transpose_125")];
	tensor<fp16, [1, 512, 138]> input_321_cast_fp16 = pad(constant_val = const_71_to_fp16, mode = input_321_mode_0, pad = input_321_pad_0, x = x_155_cast_fp16)[name = tensor<string, []>("input_321_cast_fp16")];
	tensor<string, []> x_157_pad_type_0 = const()[name = tensor<string, []>("x_157_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_157_groups_0 = const()[name = tensor<string, []>("x_157_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_157_strides_0 = const()[name = tensor<string, []>("x_157_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_157_pad_0 = const()[name = tensor<string, []>("x_157_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_157_dilations_0 = const()[name = tensor<string, []>("x_157_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_11_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_11_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37166592))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37172288))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_157_cast_fp16 = conv(dilations = x_157_dilations_0, groups = x_157_groups_0, pad = x_157_pad_0, pad_type = x_157_pad_type_0, strides = x_157_strides_0, weight = d_decoders_11_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_321_cast_fp16)[name = tensor<string, []>("x_157_cast_fp16")];
	tensor<int32, [3]> x_159_perm_0 = const()[name = tensor<string, []>("x_159_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_159_cast_fp16 = transpose(perm = x_159_perm_0, x = x_157_cast_fp16)[name = tensor<string, []>("transpose_124")];
	tensor<fp16, [1, 128, 512]> input_323_cast_fp16 = add(x = x_159_cast_fp16, y = inputs_47_cast_fp16)[name = tensor<string, []>("input_323_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_325_cast_fp16 = mul(x = input_323_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_325_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_327_cast_fp16 = add(x = input_307_cast_fp16, y = input_325_cast_fp16)[name = tensor<string, []>("input_327_cast_fp16")];
	tensor<int32, [1]> x_165_axes_0 = const()[name = tensor<string, []>("x_165_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_11_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37173376)))];
	tensor<fp16, [512]> d_decoders_11_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37174464)))];
	tensor<fp16, [1, 128, 512]> x_165_cast_fp16 = layer_norm(axes = x_165_axes_0, beta = d_decoders_11_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_norm3_weight_to_fp16, x = input_327_cast_fp16)[name = tensor<string, []>("x_165_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_11_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_11_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37175552))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37437760))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_11_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37438848)))];
	tensor<fp16, [1, 128, 512]> linear_57_cast_fp16 = linear(bias = d_decoders_11_src_attn_linear_q_bias_to_fp16, weight = d_decoders_11_src_attn_linear_q_weight_to_fp16_quantized, x = x_165_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")];
	tensor<int32, [4]> var_1173 = const()[name = tensor<string, []>("op_1173"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_1174_cast_fp16 = reshape(shape = var_1173, x = linear_57_cast_fp16)[name = tensor<string, []>("op_1174_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_11_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_11_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37439936))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37964288))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_11_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37966400)))];
	tensor<fp16, [1, 512, 1024]> linear_58_cast_fp16 = linear(bias = d_decoders_11_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_11_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_58_cast_fp16")];
	tensor<int32, [2]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_1179_axis_0 = const()[name = tensor<string, []>("op_1179_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_1179_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1179_cast_fp16_1 = split(axis = var_1179_axis_0, split_sizes = tile_11, x = linear_58_cast_fp16)[name = tensor<string, []>("op_1179_cast_fp16")];
	tensor<int32, [4]> var_1182 = const()[name = tensor<string, []>("op_1182"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1183_cast_fp16 = reshape(shape = var_1182, x = var_1179_cast_fp16_0)[name = tensor<string, []>("op_1183_cast_fp16")];
	tensor<int32, [4]> var_1185 = const()[name = tensor<string, []>("op_1185"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1186_cast_fp16 = reshape(shape = var_1185, x = var_1179_cast_fp16_1)[name = tensor<string, []>("op_1186_cast_fp16")];
	tensor<int32, [4]> value_23_perm_0 = const()[name = tensor<string, []>("value_23_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_1188_to_fp16 = const()[name = tensor<string, []>("op_1188_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_47_cast_fp16 = mul(x = var_1174_cast_fp16, y = var_1188_to_fp16)[name = tensor<string, []>("q_h_47_cast_fp16")];
	tensor<bool, []> scores_45_transpose_x_0 = const()[name = tensor<string, []>("scores_45_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_45_transpose_y_0 = const()[name = tensor<string, []>("scores_45_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_86_perm_0 = const()[name = tensor<string, []>("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_87_perm_0 = const()[name = tensor<string, []>("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_87 = transpose(perm = transpose_87_perm_0, x = var_1183_cast_fp16)[name = tensor<string, []>("transpose_122")];
	tensor<fp16, [1, 4, 128, 128]> transpose_86 = transpose(perm = transpose_86_perm_0, x = q_h_47_cast_fp16)[name = tensor<string, []>("transpose_123")];
	tensor<fp16, [1, 4, 128, 512]> scores_45_cast_fp16 = matmul(transpose_x = scores_45_transpose_x_0, transpose_y = scores_45_transpose_y_0, x = transpose_86, y = transpose_87)[name = tensor<string, []>("scores_45_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_47_cast_fp16 = select(a = var_8_to_fp16, b = scores_45_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_47_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1196_cast_fp16 = softmax(axis = var_20, x = scores_47_cast_fp16)[name = tensor<string, []>("op_1196_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_329_cast_fp16 = select(a = var_9_to_fp16, b = var_1196_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_329_cast_fp16")];
	tensor<bool, []> x_167_transpose_x_0 = const()[name = tensor<string, []>("x_167_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_167_transpose_y_0 = const()[name = tensor<string, []>("x_167_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_23_cast_fp16 = transpose(perm = value_23_perm_0, x = var_1186_cast_fp16)[name = tensor<string, []>("transpose_121")];
	tensor<fp16, [1, 4, 128, 128]> x_167_cast_fp16 = matmul(transpose_x = x_167_transpose_x_0, transpose_y = x_167_transpose_y_0, x = input_329_cast_fp16, y = value_23_cast_fp16)[name = tensor<string, []>("x_167_cast_fp16")];
	tensor<int32, [4]> var_1200_perm_0 = const()[name = tensor<string, []>("op_1200_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1202 = const()[name = tensor<string, []>("op_1202"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1200_cast_fp16 = transpose(perm = var_1200_perm_0, x = x_167_cast_fp16)[name = tensor<string, []>("transpose_120")];
	tensor<fp16, [1, 128, 512]> input_331_cast_fp16 = reshape(shape = var_1202, x = var_1200_cast_fp16)[name = tensor<string, []>("input_331_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_11_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_11_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37968512))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38230720))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_11_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38231808)))];
	tensor<fp16, [1, 128, 512]> linear_59_cast_fp16 = linear(bias = d_decoders_11_src_attn_linear_out_bias_to_fp16, weight = d_decoders_11_src_attn_linear_out_weight_to_fp16_quantized, x = input_331_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_335_cast_fp16 = add(x = input_327_cast_fp16, y = linear_59_cast_fp16)[name = tensor<string, []>("input_335_cast_fp16")];
	tensor<int32, [1]> input_337_axes_0 = const()[name = tensor<string, []>("input_337_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_12_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38232896)))];
	tensor<fp16, [512]> d_decoders_12_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38233984)))];
	tensor<fp16, [1, 128, 512]> input_337_cast_fp16 = layer_norm(axes = input_337_axes_0, beta = d_decoders_12_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_norm1_weight_to_fp16, x = input_335_cast_fp16)[name = tensor<string, []>("input_337_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_12_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_12_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38235072))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39283712))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_12_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39287872)))];
	tensor<fp16, [1, 128, 2048]> linear_60_cast_fp16 = linear(bias = d_decoders_12_feed_forward_w_1_bias_to_fp16, weight = d_decoders_12_feed_forward_w_1_weight_to_fp16_quantized, x = input_337_cast_fp16)[name = tensor<string, []>("linear_60_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_341_cast_fp16 = relu(x = linear_60_cast_fp16)[name = tensor<string, []>("input_341_cast_fp16")];
	tensor<int32, [1]> input_345_axes_0 = const()[name = tensor<string, []>("input_345_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_12_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39292032)))];
	tensor<fp16, [2048]> d_decoders_12_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39296192)))];
	tensor<fp16, [1, 128, 2048]> input_345_cast_fp16 = layer_norm(axes = input_345_axes_0, beta = d_decoders_12_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_feed_forward_norm_weight_to_fp16, x = input_341_cast_fp16)[name = tensor<string, []>("input_345_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_12_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_12_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39300352))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40348992))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_12_feed_forward_w_2_weight_to_fp16_quantized, x = input_345_cast_fp16)[name = tensor<string, []>("linear_61_cast_fp16")];
	tensor<int32, [1]> inputs_49_axes_0 = const()[name = tensor<string, []>("inputs_49_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_12_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40350080)))];
	tensor<fp16, [512]> d_decoders_12_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40351168)))];
	tensor<fp16, [1, 128, 512]> inputs_49_cast_fp16 = layer_norm(axes = inputs_49_axes_0, beta = d_decoders_12_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_norm2_weight_to_fp16, x = linear_61_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_51_cast_fp16 = mul(x = inputs_49_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
	tensor<int32, [3]> x_169_perm_0 = const()[name = tensor<string, []>("x_169_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_349_pad_0 = const()[name = tensor<string, []>("input_349_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_349_mode_0 = const()[name = tensor<string, []>("input_349_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_77_to_fp16 = const()[name = tensor<string, []>("const_77_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_169_cast_fp16 = transpose(perm = x_169_perm_0, x = inputs_51_cast_fp16)[name = tensor<string, []>("transpose_119")];
	tensor<fp16, [1, 512, 138]> input_349_cast_fp16 = pad(constant_val = const_77_to_fp16, mode = input_349_mode_0, pad = input_349_pad_0, x = x_169_cast_fp16)[name = tensor<string, []>("input_349_cast_fp16")];
	tensor<string, []> x_171_pad_type_0 = const()[name = tensor<string, []>("x_171_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_171_groups_0 = const()[name = tensor<string, []>("x_171_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_171_strides_0 = const()[name = tensor<string, []>("x_171_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_171_pad_0 = const()[name = tensor<string, []>("x_171_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_171_dilations_0 = const()[name = tensor<string, []>("x_171_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_12_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_12_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40352256))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40357952))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_171_cast_fp16 = conv(dilations = x_171_dilations_0, groups = x_171_groups_0, pad = x_171_pad_0, pad_type = x_171_pad_type_0, strides = x_171_strides_0, weight = d_decoders_12_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_349_cast_fp16)[name = tensor<string, []>("x_171_cast_fp16")];
	tensor<int32, [3]> x_173_perm_0 = const()[name = tensor<string, []>("x_173_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_173_cast_fp16 = transpose(perm = x_173_perm_0, x = x_171_cast_fp16)[name = tensor<string, []>("transpose_118")];
	tensor<fp16, [1, 128, 512]> input_351_cast_fp16 = add(x = x_173_cast_fp16, y = inputs_51_cast_fp16)[name = tensor<string, []>("input_351_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_353_cast_fp16 = mul(x = input_351_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_353_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_355_cast_fp16 = add(x = input_335_cast_fp16, y = input_353_cast_fp16)[name = tensor<string, []>("input_355_cast_fp16")];
	tensor<int32, [1]> x_179_axes_0 = const()[name = tensor<string, []>("x_179_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_12_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40359040)))];
	tensor<fp16, [512]> d_decoders_12_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40360128)))];
	tensor<fp16, [1, 128, 512]> x_179_cast_fp16 = layer_norm(axes = x_179_axes_0, beta = d_decoders_12_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_norm3_weight_to_fp16, x = input_355_cast_fp16)[name = tensor<string, []>("x_179_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_12_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_12_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40361216))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40623424))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_12_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40624512)))];
	tensor<fp16, [1, 128, 512]> linear_62_cast_fp16 = linear(bias = d_decoders_12_src_attn_linear_q_bias_to_fp16, weight = d_decoders_12_src_attn_linear_q_weight_to_fp16_quantized, x = x_179_cast_fp16)[name = tensor<string, []>("linear_62_cast_fp16")];
	tensor<int32, [4]> var_1268 = const()[name = tensor<string, []>("op_1268"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_1269_cast_fp16 = reshape(shape = var_1268, x = linear_62_cast_fp16)[name = tensor<string, []>("op_1269_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_12_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_12_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40625600))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41149952))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_12_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41152064)))];
	tensor<fp16, [1, 512, 1024]> linear_63_cast_fp16 = linear(bias = d_decoders_12_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_12_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_63_cast_fp16")];
	tensor<int32, [2]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_1274_axis_0 = const()[name = tensor<string, []>("op_1274_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_1274_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1274_cast_fp16_1 = split(axis = var_1274_axis_0, split_sizes = tile_12, x = linear_63_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
	tensor<int32, [4]> var_1277 = const()[name = tensor<string, []>("op_1277"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1278_cast_fp16 = reshape(shape = var_1277, x = var_1274_cast_fp16_0)[name = tensor<string, []>("op_1278_cast_fp16")];
	tensor<int32, [4]> var_1280 = const()[name = tensor<string, []>("op_1280"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1281_cast_fp16 = reshape(shape = var_1280, x = var_1274_cast_fp16_1)[name = tensor<string, []>("op_1281_cast_fp16")];
	tensor<int32, [4]> value_25_perm_0 = const()[name = tensor<string, []>("value_25_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_1283_to_fp16 = const()[name = tensor<string, []>("op_1283_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_51_cast_fp16 = mul(x = var_1269_cast_fp16, y = var_1283_to_fp16)[name = tensor<string, []>("q_h_51_cast_fp16")];
	tensor<bool, []> scores_49_transpose_x_0 = const()[name = tensor<string, []>("scores_49_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_49_transpose_y_0 = const()[name = tensor<string, []>("scores_49_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_88_perm_0 = const()[name = tensor<string, []>("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_89_perm_0 = const()[name = tensor<string, []>("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_89 = transpose(perm = transpose_89_perm_0, x = var_1278_cast_fp16)[name = tensor<string, []>("transpose_116")];
	tensor<fp16, [1, 4, 128, 128]> transpose_88 = transpose(perm = transpose_88_perm_0, x = q_h_51_cast_fp16)[name = tensor<string, []>("transpose_117")];
	tensor<fp16, [1, 4, 128, 512]> scores_49_cast_fp16 = matmul(transpose_x = scores_49_transpose_x_0, transpose_y = scores_49_transpose_y_0, x = transpose_88, y = transpose_89)[name = tensor<string, []>("scores_49_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_51_cast_fp16 = select(a = var_8_to_fp16, b = scores_49_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_51_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1291_cast_fp16 = softmax(axis = var_20, x = scores_51_cast_fp16)[name = tensor<string, []>("op_1291_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_357_cast_fp16 = select(a = var_9_to_fp16, b = var_1291_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_357_cast_fp16")];
	tensor<bool, []> x_181_transpose_x_0 = const()[name = tensor<string, []>("x_181_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_181_transpose_y_0 = const()[name = tensor<string, []>("x_181_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_25_cast_fp16 = transpose(perm = value_25_perm_0, x = var_1281_cast_fp16)[name = tensor<string, []>("transpose_115")];
	tensor<fp16, [1, 4, 128, 128]> x_181_cast_fp16 = matmul(transpose_x = x_181_transpose_x_0, transpose_y = x_181_transpose_y_0, x = input_357_cast_fp16, y = value_25_cast_fp16)[name = tensor<string, []>("x_181_cast_fp16")];
	tensor<int32, [4]> var_1295_perm_0 = const()[name = tensor<string, []>("op_1295_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1297 = const()[name = tensor<string, []>("op_1297"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1295_cast_fp16 = transpose(perm = var_1295_perm_0, x = x_181_cast_fp16)[name = tensor<string, []>("transpose_114")];
	tensor<fp16, [1, 128, 512]> input_359_cast_fp16 = reshape(shape = var_1297, x = var_1295_cast_fp16)[name = tensor<string, []>("input_359_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_12_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_12_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41154176))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41416384))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_12_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41417472)))];
	tensor<fp16, [1, 128, 512]> linear_64_cast_fp16 = linear(bias = d_decoders_12_src_attn_linear_out_bias_to_fp16, weight = d_decoders_12_src_attn_linear_out_weight_to_fp16_quantized, x = input_359_cast_fp16)[name = tensor<string, []>("linear_64_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_363_cast_fp16 = add(x = input_355_cast_fp16, y = linear_64_cast_fp16)[name = tensor<string, []>("input_363_cast_fp16")];
	tensor<int32, [1]> input_365_axes_0 = const()[name = tensor<string, []>("input_365_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_13_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41418560)))];
	tensor<fp16, [512]> d_decoders_13_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41419648)))];
	tensor<fp16, [1, 128, 512]> input_365_cast_fp16 = layer_norm(axes = input_365_axes_0, beta = d_decoders_13_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_norm1_weight_to_fp16, x = input_363_cast_fp16)[name = tensor<string, []>("input_365_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_13_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_13_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41420736))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42469376))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_13_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42473536)))];
	tensor<fp16, [1, 128, 2048]> linear_65_cast_fp16 = linear(bias = d_decoders_13_feed_forward_w_1_bias_to_fp16, weight = d_decoders_13_feed_forward_w_1_weight_to_fp16_quantized, x = input_365_cast_fp16)[name = tensor<string, []>("linear_65_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_369_cast_fp16 = relu(x = linear_65_cast_fp16)[name = tensor<string, []>("input_369_cast_fp16")];
	tensor<int32, [1]> input_373_axes_0 = const()[name = tensor<string, []>("input_373_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_13_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42477696)))];
	tensor<fp16, [2048]> d_decoders_13_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42481856)))];
	tensor<fp16, [1, 128, 2048]> input_373_cast_fp16 = layer_norm(axes = input_373_axes_0, beta = d_decoders_13_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_feed_forward_norm_weight_to_fp16, x = input_369_cast_fp16)[name = tensor<string, []>("input_373_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_13_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_13_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42486016))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43534656))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_13_feed_forward_w_2_weight_to_fp16_quantized, x = input_373_cast_fp16)[name = tensor<string, []>("linear_66_cast_fp16")];
	tensor<int32, [1]> inputs_53_axes_0 = const()[name = tensor<string, []>("inputs_53_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_13_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43535744)))];
	tensor<fp16, [512]> d_decoders_13_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43536832)))];
	tensor<fp16, [1, 128, 512]> inputs_53_cast_fp16 = layer_norm(axes = inputs_53_axes_0, beta = d_decoders_13_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_norm2_weight_to_fp16, x = linear_66_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_55_cast_fp16 = mul(x = inputs_53_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
	tensor<int32, [3]> x_183_perm_0 = const()[name = tensor<string, []>("x_183_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_377_pad_0 = const()[name = tensor<string, []>("input_377_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_377_mode_0 = const()[name = tensor<string, []>("input_377_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_83_to_fp16 = const()[name = tensor<string, []>("const_83_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_183_cast_fp16 = transpose(perm = x_183_perm_0, x = inputs_55_cast_fp16)[name = tensor<string, []>("transpose_113")];
	tensor<fp16, [1, 512, 138]> input_377_cast_fp16 = pad(constant_val = const_83_to_fp16, mode = input_377_mode_0, pad = input_377_pad_0, x = x_183_cast_fp16)[name = tensor<string, []>("input_377_cast_fp16")];
	tensor<string, []> x_185_pad_type_0 = const()[name = tensor<string, []>("x_185_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_185_groups_0 = const()[name = tensor<string, []>("x_185_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_185_strides_0 = const()[name = tensor<string, []>("x_185_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_185_pad_0 = const()[name = tensor<string, []>("x_185_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_185_dilations_0 = const()[name = tensor<string, []>("x_185_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_13_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_13_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43537920))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43543616))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_185_cast_fp16 = conv(dilations = x_185_dilations_0, groups = x_185_groups_0, pad = x_185_pad_0, pad_type = x_185_pad_type_0, strides = x_185_strides_0, weight = d_decoders_13_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_377_cast_fp16)[name = tensor<string, []>("x_185_cast_fp16")];
	tensor<int32, [3]> x_187_perm_0 = const()[name = tensor<string, []>("x_187_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_187_cast_fp16 = transpose(perm = x_187_perm_0, x = x_185_cast_fp16)[name = tensor<string, []>("transpose_112")];
	tensor<fp16, [1, 128, 512]> input_379_cast_fp16 = add(x = x_187_cast_fp16, y = inputs_55_cast_fp16)[name = tensor<string, []>("input_379_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_381_cast_fp16 = mul(x = input_379_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_381_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_383_cast_fp16 = add(x = input_363_cast_fp16, y = input_381_cast_fp16)[name = tensor<string, []>("input_383_cast_fp16")];
	tensor<int32, [1]> x_193_axes_0 = const()[name = tensor<string, []>("x_193_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_13_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43544704)))];
	tensor<fp16, [512]> d_decoders_13_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43545792)))];
	tensor<fp16, [1, 128, 512]> x_193_cast_fp16 = layer_norm(axes = x_193_axes_0, beta = d_decoders_13_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_norm3_weight_to_fp16, x = input_383_cast_fp16)[name = tensor<string, []>("x_193_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_13_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_13_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43546880))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43809088))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_13_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43810176)))];
	tensor<fp16, [1, 128, 512]> linear_67_cast_fp16 = linear(bias = d_decoders_13_src_attn_linear_q_bias_to_fp16, weight = d_decoders_13_src_attn_linear_q_weight_to_fp16_quantized, x = x_193_cast_fp16)[name = tensor<string, []>("linear_67_cast_fp16")];
	tensor<int32, [4]> var_1363 = const()[name = tensor<string, []>("op_1363"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_1364_cast_fp16 = reshape(shape = var_1363, x = linear_67_cast_fp16)[name = tensor<string, []>("op_1364_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_13_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_13_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43811264))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44335616))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_13_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44337728)))];
	tensor<fp16, [1, 512, 1024]> linear_68_cast_fp16 = linear(bias = d_decoders_13_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_13_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_68_cast_fp16")];
	tensor<int32, [2]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_1369_axis_0 = const()[name = tensor<string, []>("op_1369_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_1369_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1369_cast_fp16_1 = split(axis = var_1369_axis_0, split_sizes = tile_13, x = linear_68_cast_fp16)[name = tensor<string, []>("op_1369_cast_fp16")];
	tensor<int32, [4]> var_1372 = const()[name = tensor<string, []>("op_1372"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1373_cast_fp16 = reshape(shape = var_1372, x = var_1369_cast_fp16_0)[name = tensor<string, []>("op_1373_cast_fp16")];
	tensor<int32, [4]> var_1375 = const()[name = tensor<string, []>("op_1375"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1376_cast_fp16 = reshape(shape = var_1375, x = var_1369_cast_fp16_1)[name = tensor<string, []>("op_1376_cast_fp16")];
	tensor<int32, [4]> value_27_perm_0 = const()[name = tensor<string, []>("value_27_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_1378_to_fp16 = const()[name = tensor<string, []>("op_1378_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_55_cast_fp16 = mul(x = var_1364_cast_fp16, y = var_1378_to_fp16)[name = tensor<string, []>("q_h_55_cast_fp16")];
	tensor<bool, []> scores_53_transpose_x_0 = const()[name = tensor<string, []>("scores_53_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_53_transpose_y_0 = const()[name = tensor<string, []>("scores_53_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_90_perm_0 = const()[name = tensor<string, []>("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_91_perm_0 = const()[name = tensor<string, []>("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_91 = transpose(perm = transpose_91_perm_0, x = var_1373_cast_fp16)[name = tensor<string, []>("transpose_110")];
	tensor<fp16, [1, 4, 128, 128]> transpose_90 = transpose(perm = transpose_90_perm_0, x = q_h_55_cast_fp16)[name = tensor<string, []>("transpose_111")];
	tensor<fp16, [1, 4, 128, 512]> scores_53_cast_fp16 = matmul(transpose_x = scores_53_transpose_x_0, transpose_y = scores_53_transpose_y_0, x = transpose_90, y = transpose_91)[name = tensor<string, []>("scores_53_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_55_cast_fp16 = select(a = var_8_to_fp16, b = scores_53_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_55_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1386_cast_fp16 = softmax(axis = var_20, x = scores_55_cast_fp16)[name = tensor<string, []>("op_1386_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_385_cast_fp16 = select(a = var_9_to_fp16, b = var_1386_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_385_cast_fp16")];
	tensor<bool, []> x_195_transpose_x_0 = const()[name = tensor<string, []>("x_195_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_195_transpose_y_0 = const()[name = tensor<string, []>("x_195_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_27_cast_fp16 = transpose(perm = value_27_perm_0, x = var_1376_cast_fp16)[name = tensor<string, []>("transpose_109")];
	tensor<fp16, [1, 4, 128, 128]> x_195_cast_fp16 = matmul(transpose_x = x_195_transpose_x_0, transpose_y = x_195_transpose_y_0, x = input_385_cast_fp16, y = value_27_cast_fp16)[name = tensor<string, []>("x_195_cast_fp16")];
	tensor<int32, [4]> var_1390_perm_0 = const()[name = tensor<string, []>("op_1390_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1392 = const()[name = tensor<string, []>("op_1392"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1390_cast_fp16 = transpose(perm = var_1390_perm_0, x = x_195_cast_fp16)[name = tensor<string, []>("transpose_108")];
	tensor<fp16, [1, 128, 512]> input_387_cast_fp16 = reshape(shape = var_1392, x = var_1390_cast_fp16)[name = tensor<string, []>("input_387_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_13_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_13_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44339840))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44602048))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_13_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44603136)))];
	tensor<fp16, [1, 128, 512]> linear_69_cast_fp16 = linear(bias = d_decoders_13_src_attn_linear_out_bias_to_fp16, weight = d_decoders_13_src_attn_linear_out_weight_to_fp16_quantized, x = input_387_cast_fp16)[name = tensor<string, []>("linear_69_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_391_cast_fp16 = add(x = input_383_cast_fp16, y = linear_69_cast_fp16)[name = tensor<string, []>("input_391_cast_fp16")];
	tensor<int32, [1]> input_393_axes_0 = const()[name = tensor<string, []>("input_393_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_14_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44604224)))];
	tensor<fp16, [512]> d_decoders_14_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44605312)))];
	tensor<fp16, [1, 128, 512]> input_393_cast_fp16 = layer_norm(axes = input_393_axes_0, beta = d_decoders_14_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_norm1_weight_to_fp16, x = input_391_cast_fp16)[name = tensor<string, []>("input_393_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_14_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_14_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44606400))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45655040))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_14_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45659200)))];
	tensor<fp16, [1, 128, 2048]> linear_70_cast_fp16 = linear(bias = d_decoders_14_feed_forward_w_1_bias_to_fp16, weight = d_decoders_14_feed_forward_w_1_weight_to_fp16_quantized, x = input_393_cast_fp16)[name = tensor<string, []>("linear_70_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_397_cast_fp16 = relu(x = linear_70_cast_fp16)[name = tensor<string, []>("input_397_cast_fp16")];
	tensor<int32, [1]> input_401_axes_0 = const()[name = tensor<string, []>("input_401_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_14_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45663360)))];
	tensor<fp16, [2048]> d_decoders_14_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45667520)))];
	tensor<fp16, [1, 128, 2048]> input_401_cast_fp16 = layer_norm(axes = input_401_axes_0, beta = d_decoders_14_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_feed_forward_norm_weight_to_fp16, x = input_397_cast_fp16)[name = tensor<string, []>("input_401_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_14_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_14_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45671680))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46720320))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_14_feed_forward_w_2_weight_to_fp16_quantized, x = input_401_cast_fp16)[name = tensor<string, []>("linear_71_cast_fp16")];
	tensor<int32, [1]> inputs_57_axes_0 = const()[name = tensor<string, []>("inputs_57_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_14_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46721408)))];
	tensor<fp16, [512]> d_decoders_14_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46722496)))];
	tensor<fp16, [1, 128, 512]> inputs_57_cast_fp16 = layer_norm(axes = inputs_57_axes_0, beta = d_decoders_14_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_norm2_weight_to_fp16, x = linear_71_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_59_cast_fp16 = mul(x = inputs_57_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
	tensor<int32, [3]> x_197_perm_0 = const()[name = tensor<string, []>("x_197_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_405_pad_0 = const()[name = tensor<string, []>("input_405_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_405_mode_0 = const()[name = tensor<string, []>("input_405_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_89_to_fp16 = const()[name = tensor<string, []>("const_89_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_197_cast_fp16 = transpose(perm = x_197_perm_0, x = inputs_59_cast_fp16)[name = tensor<string, []>("transpose_107")];
	tensor<fp16, [1, 512, 138]> input_405_cast_fp16 = pad(constant_val = const_89_to_fp16, mode = input_405_mode_0, pad = input_405_pad_0, x = x_197_cast_fp16)[name = tensor<string, []>("input_405_cast_fp16")];
	tensor<string, []> x_199_pad_type_0 = const()[name = tensor<string, []>("x_199_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_199_groups_0 = const()[name = tensor<string, []>("x_199_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_199_strides_0 = const()[name = tensor<string, []>("x_199_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_199_pad_0 = const()[name = tensor<string, []>("x_199_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_199_dilations_0 = const()[name = tensor<string, []>("x_199_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_14_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_14_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46723584))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46729280))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_199_cast_fp16 = conv(dilations = x_199_dilations_0, groups = x_199_groups_0, pad = x_199_pad_0, pad_type = x_199_pad_type_0, strides = x_199_strides_0, weight = d_decoders_14_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_405_cast_fp16)[name = tensor<string, []>("x_199_cast_fp16")];
	tensor<int32, [3]> x_201_perm_0 = const()[name = tensor<string, []>("x_201_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_201_cast_fp16 = transpose(perm = x_201_perm_0, x = x_199_cast_fp16)[name = tensor<string, []>("transpose_106")];
	tensor<fp16, [1, 128, 512]> input_407_cast_fp16 = add(x = x_201_cast_fp16, y = inputs_59_cast_fp16)[name = tensor<string, []>("input_407_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_409_cast_fp16 = mul(x = input_407_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_409_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_411_cast_fp16 = add(x = input_391_cast_fp16, y = input_409_cast_fp16)[name = tensor<string, []>("input_411_cast_fp16")];
	tensor<int32, [1]> x_207_axes_0 = const()[name = tensor<string, []>("x_207_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_14_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46730368)))];
	tensor<fp16, [512]> d_decoders_14_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46731456)))];
	tensor<fp16, [1, 128, 512]> x_207_cast_fp16 = layer_norm(axes = x_207_axes_0, beta = d_decoders_14_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_norm3_weight_to_fp16, x = input_411_cast_fp16)[name = tensor<string, []>("x_207_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_14_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_14_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46732544))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46994752))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_14_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46995840)))];
	tensor<fp16, [1, 128, 512]> linear_72_cast_fp16 = linear(bias = d_decoders_14_src_attn_linear_q_bias_to_fp16, weight = d_decoders_14_src_attn_linear_q_weight_to_fp16_quantized, x = x_207_cast_fp16)[name = tensor<string, []>("linear_72_cast_fp16")];
	tensor<int32, [4]> var_1458 = const()[name = tensor<string, []>("op_1458"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_1459_cast_fp16 = reshape(shape = var_1458, x = linear_72_cast_fp16)[name = tensor<string, []>("op_1459_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_14_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_14_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46996928))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47521280))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_14_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47523392)))];
	tensor<fp16, [1, 512, 1024]> linear_73_cast_fp16 = linear(bias = d_decoders_14_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_14_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_73_cast_fp16")];
	tensor<int32, [2]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_1464_axis_0 = const()[name = tensor<string, []>("op_1464_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_1464_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1464_cast_fp16_1 = split(axis = var_1464_axis_0, split_sizes = tile_14, x = linear_73_cast_fp16)[name = tensor<string, []>("op_1464_cast_fp16")];
	tensor<int32, [4]> var_1467 = const()[name = tensor<string, []>("op_1467"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1468_cast_fp16 = reshape(shape = var_1467, x = var_1464_cast_fp16_0)[name = tensor<string, []>("op_1468_cast_fp16")];
	tensor<int32, [4]> var_1470 = const()[name = tensor<string, []>("op_1470"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1471_cast_fp16 = reshape(shape = var_1470, x = var_1464_cast_fp16_1)[name = tensor<string, []>("op_1471_cast_fp16")];
	tensor<int32, [4]> value_29_perm_0 = const()[name = tensor<string, []>("value_29_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_1473_to_fp16 = const()[name = tensor<string, []>("op_1473_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_59_cast_fp16 = mul(x = var_1459_cast_fp16, y = var_1473_to_fp16)[name = tensor<string, []>("q_h_59_cast_fp16")];
	tensor<bool, []> scores_57_transpose_x_0 = const()[name = tensor<string, []>("scores_57_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_57_transpose_y_0 = const()[name = tensor<string, []>("scores_57_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_92_perm_0 = const()[name = tensor<string, []>("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_93_perm_0 = const()[name = tensor<string, []>("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_93 = transpose(perm = transpose_93_perm_0, x = var_1468_cast_fp16)[name = tensor<string, []>("transpose_104")];
	tensor<fp16, [1, 4, 128, 128]> transpose_92 = transpose(perm = transpose_92_perm_0, x = q_h_59_cast_fp16)[name = tensor<string, []>("transpose_105")];
	tensor<fp16, [1, 4, 128, 512]> scores_57_cast_fp16 = matmul(transpose_x = scores_57_transpose_x_0, transpose_y = scores_57_transpose_y_0, x = transpose_92, y = transpose_93)[name = tensor<string, []>("scores_57_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_59_cast_fp16 = select(a = var_8_to_fp16, b = scores_57_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_59_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1481_cast_fp16 = softmax(axis = var_20, x = scores_59_cast_fp16)[name = tensor<string, []>("op_1481_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_413_cast_fp16 = select(a = var_9_to_fp16, b = var_1481_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_413_cast_fp16")];
	tensor<bool, []> x_209_transpose_x_0 = const()[name = tensor<string, []>("x_209_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_209_transpose_y_0 = const()[name = tensor<string, []>("x_209_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_29_cast_fp16 = transpose(perm = value_29_perm_0, x = var_1471_cast_fp16)[name = tensor<string, []>("transpose_103")];
	tensor<fp16, [1, 4, 128, 128]> x_209_cast_fp16 = matmul(transpose_x = x_209_transpose_x_0, transpose_y = x_209_transpose_y_0, x = input_413_cast_fp16, y = value_29_cast_fp16)[name = tensor<string, []>("x_209_cast_fp16")];
	tensor<int32, [4]> var_1485_perm_0 = const()[name = tensor<string, []>("op_1485_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1487 = const()[name = tensor<string, []>("op_1487"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1485_cast_fp16 = transpose(perm = var_1485_perm_0, x = x_209_cast_fp16)[name = tensor<string, []>("transpose_102")];
	tensor<fp16, [1, 128, 512]> input_415_cast_fp16 = reshape(shape = var_1487, x = var_1485_cast_fp16)[name = tensor<string, []>("input_415_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_14_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_14_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47525504))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47787712))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_14_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47788800)))];
	tensor<fp16, [1, 128, 512]> linear_74_cast_fp16 = linear(bias = d_decoders_14_src_attn_linear_out_bias_to_fp16, weight = d_decoders_14_src_attn_linear_out_weight_to_fp16_quantized, x = input_415_cast_fp16)[name = tensor<string, []>("linear_74_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_419_cast_fp16 = add(x = input_411_cast_fp16, y = linear_74_cast_fp16)[name = tensor<string, []>("input_419_cast_fp16")];
	tensor<int32, [1]> input_421_axes_0 = const()[name = tensor<string, []>("input_421_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_15_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47789888)))];
	tensor<fp16, [512]> d_decoders_15_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47790976)))];
	tensor<fp16, [1, 128, 512]> input_421_cast_fp16 = layer_norm(axes = input_421_axes_0, beta = d_decoders_15_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_norm1_weight_to_fp16, x = input_419_cast_fp16)[name = tensor<string, []>("input_421_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders_15_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_15_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47792064))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48840704))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders_15_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48844864)))];
	tensor<fp16, [1, 128, 2048]> linear_75_cast_fp16 = linear(bias = d_decoders_15_feed_forward_w_1_bias_to_fp16, weight = d_decoders_15_feed_forward_w_1_weight_to_fp16_quantized, x = input_421_cast_fp16)[name = tensor<string, []>("linear_75_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_425_cast_fp16 = relu(x = linear_75_cast_fp16)[name = tensor<string, []>("input_425_cast_fp16")];
	tensor<int32, [1]> input_429_axes_0 = const()[name = tensor<string, []>("input_429_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders_15_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48849024)))];
	tensor<fp16, [2048]> d_decoders_15_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48853184)))];
	tensor<fp16, [1, 128, 2048]> input_429_cast_fp16 = layer_norm(axes = input_429_axes_0, beta = d_decoders_15_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_feed_forward_norm_weight_to_fp16, x = input_425_cast_fp16)[name = tensor<string, []>("input_429_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders_15_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_15_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48857344))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49905984))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_15_feed_forward_w_2_weight_to_fp16_quantized, x = input_429_cast_fp16)[name = tensor<string, []>("linear_76_cast_fp16")];
	tensor<int32, [1]> inputs_61_axes_0 = const()[name = tensor<string, []>("inputs_61_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_15_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49907072)))];
	tensor<fp16, [512]> d_decoders_15_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49908160)))];
	tensor<fp16, [1, 128, 512]> inputs_61_cast_fp16 = layer_norm(axes = inputs_61_axes_0, beta = d_decoders_15_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_norm2_weight_to_fp16, x = linear_76_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
	tensor<fp16, [1, 128, 512]> inputs_cast_fp16 = mul(x = inputs_61_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
	tensor<int32, [3]> x_211_perm_0 = const()[name = tensor<string, []>("x_211_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<int32, [6]> input_433_pad_0 = const()[name = tensor<string, []>("input_433_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
	tensor<string, []> input_433_mode_0 = const()[name = tensor<string, []>("input_433_mode_0"), val = tensor<string, []>("constant")];
	tensor<fp16, []> const_95_to_fp16 = const()[name = tensor<string, []>("const_95_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
	tensor<fp16, [1, 512, 128]> x_211_cast_fp16 = transpose(perm = x_211_perm_0, x = inputs_cast_fp16)[name = tensor<string, []>("transpose_101")];
	tensor<fp16, [1, 512, 138]> input_433_cast_fp16 = pad(constant_val = const_95_to_fp16, mode = input_433_mode_0, pad = input_433_pad_0, x = x_211_cast_fp16)[name = tensor<string, []>("input_433_cast_fp16")];
	tensor<string, []> x_213_pad_type_0 = const()[name = tensor<string, []>("x_213_pad_type_0"), val = tensor<string, []>("valid")];
	tensor<int32, []> x_213_groups_0 = const()[name = tensor<string, []>("x_213_groups_0"), val = tensor<int32, []>(512)];
	tensor<int32, [1]> x_213_strides_0 = const()[name = tensor<string, []>("x_213_strides_0"), val = tensor<int32, [1]>([1])];
	tensor<int32, [2]> x_213_pad_0 = const()[name = tensor<string, []>("x_213_pad_0"), val = tensor<int32, [2]>([0, 0])];
	tensor<int32, [1]> x_213_dilations_0 = const()[name = tensor<string, []>("x_213_dilations_0"), val = tensor<int32, [1]>([1])];
	tensor<fp16, [512, 1, 11]> d_decoders_15_self_attn_fsmn_block_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_15_self_attn_fsmn_block_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49909248))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49914944))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 512, 128]> x_213_cast_fp16 = conv(dilations = x_213_dilations_0, groups = x_213_groups_0, pad = x_213_pad_0, pad_type = x_213_pad_type_0, strides = x_213_strides_0, weight = d_decoders_15_self_attn_fsmn_block_weight_to_fp16_quantized, x = input_433_cast_fp16)[name = tensor<string, []>("x_213_cast_fp16")];
	tensor<int32, [3]> x_215_perm_0 = const()[name = tensor<string, []>("x_215_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
	tensor<fp16, [1, 128, 512]> x_215_cast_fp16 = transpose(perm = x_215_perm_0, x = x_213_cast_fp16)[name = tensor<string, []>("transpose_100")];
	tensor<fp16, [1, 128, 512]> input_435_cast_fp16 = add(x = x_215_cast_fp16, y = inputs_cast_fp16)[name = tensor<string, []>("input_435_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_437_cast_fp16 = mul(x = input_435_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_437_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_439_cast_fp16 = add(x = input_419_cast_fp16, y = input_437_cast_fp16)[name = tensor<string, []>("input_439_cast_fp16")];
	tensor<int32, [1]> x_221_axes_0 = const()[name = tensor<string, []>("x_221_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders_15_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49916032)))];
	tensor<fp16, [512]> d_decoders_15_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49917120)))];
	tensor<fp16, [1, 128, 512]> x_221_cast_fp16 = layer_norm(axes = x_221_axes_0, beta = d_decoders_15_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_norm3_weight_to_fp16, x = input_439_cast_fp16)[name = tensor<string, []>("x_221_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_15_src_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_15_src_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49918208))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50180416))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_15_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50181504)))];
	tensor<fp16, [1, 128, 512]> linear_77_cast_fp16 = linear(bias = d_decoders_15_src_attn_linear_q_bias_to_fp16, weight = d_decoders_15_src_attn_linear_q_weight_to_fp16_quantized, x = x_221_cast_fp16)[name = tensor<string, []>("linear_77_cast_fp16")];
	tensor<int32, [4]> var_1553 = const()[name = tensor<string, []>("op_1553"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 128, 4, 128]> var_1554_cast_fp16 = reshape(shape = var_1553, x = linear_77_cast_fp16)[name = tensor<string, []>("op_1554_cast_fp16")];
	tensor<fp16, [1024, 512]> d_decoders_15_src_attn_linear_k_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_15_src_attn_linear_k_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50182592))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50706944))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2920896)))];
	tensor<fp16, [1024]> d_decoders_15_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50709056)))];
	tensor<fp16, [1, 512, 1024]> linear_78_cast_fp16 = linear(bias = d_decoders_15_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_15_src_attn_linear_k_v_weight_to_fp16_quantized, x = enc_to_fp16)[name = tensor<string, []>("linear_78_cast_fp16")];
	tensor<int32, [2]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [2]>([512, 512])];
	tensor<int32, []> var_1559_axis_0 = const()[name = tensor<string, []>("op_1559_axis_0"), val = tensor<int32, []>(-1)];
	tensor<fp16, [1, 512, 512]> var_1559_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1559_cast_fp16_1 = split(axis = var_1559_axis_0, split_sizes = tile_15, x = linear_78_cast_fp16)[name = tensor<string, []>("op_1559_cast_fp16")];
	tensor<int32, [4]> var_1562 = const()[name = tensor<string, []>("op_1562"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1563_cast_fp16 = reshape(shape = var_1562, x = var_1559_cast_fp16_0)[name = tensor<string, []>("op_1563_cast_fp16")];
	tensor<int32, [4]> var_1565 = const()[name = tensor<string, []>("op_1565"), val = tensor<int32, [4]>([1, -1, 4, 128])];
	tensor<fp16, [1, 512, 4, 128]> var_1566_cast_fp16 = reshape(shape = var_1565, x = var_1559_cast_fp16_1)[name = tensor<string, []>("op_1566_cast_fp16")];
	tensor<int32, [4]> value_perm_0 = const()[name = tensor<string, []>("value_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<fp16, []> var_1568_to_fp16 = const()[name = tensor<string, []>("op_1568_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
	tensor<fp16, [1, 128, 4, 128]> q_h_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1568_to_fp16)[name = tensor<string, []>("q_h_cast_fp16")];
	tensor<bool, []> scores_61_transpose_x_0 = const()[name = tensor<string, []>("scores_61_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> scores_61_transpose_y_0 = const()[name = tensor<string, []>("scores_61_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<int32, [4]> transpose_94_perm_0 = const()[name = tensor<string, []>("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
	tensor<int32, [4]> transpose_95_perm_0 = const()[name = tensor<string, []>("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
	tensor<fp16, [1, 4, 128, 512]> transpose_95 = transpose(perm = transpose_95_perm_0, x = var_1563_cast_fp16)[name = tensor<string, []>("transpose_98")];
	tensor<fp16, [1, 4, 128, 128]> transpose_94 = transpose(perm = transpose_94_perm_0, x = q_h_cast_fp16)[name = tensor<string, []>("transpose_99")];
	tensor<fp16, [1, 4, 128, 512]> scores_61_cast_fp16 = matmul(transpose_x = scores_61_transpose_x_0, transpose_y = scores_61_transpose_y_0, x = transpose_94, y = transpose_95)[name = tensor<string, []>("scores_61_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> scores_cast_fp16 = select(a = var_8_to_fp16, b = scores_61_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> var_1576_cast_fp16 = softmax(axis = var_20, x = scores_cast_fp16)[name = tensor<string, []>("op_1576_cast_fp16")];
	tensor<fp16, [1, 4, 128, 512]> input_441_cast_fp16 = select(a = var_9_to_fp16, b = var_1576_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_441_cast_fp16")];
	tensor<bool, []> x_transpose_x_0 = const()[name = tensor<string, []>("x_transpose_x_0"), val = tensor<bool, []>(false)];
	tensor<bool, []> x_transpose_y_0 = const()[name = tensor<string, []>("x_transpose_y_0"), val = tensor<bool, []>(false)];
	tensor<fp16, [1, 4, 512, 128]> value_cast_fp16 = transpose(perm = value_perm_0, x = var_1566_cast_fp16)[name = tensor<string, []>("transpose_97")];
	tensor<fp16, [1, 4, 128, 128]> x_cast_fp16 = matmul(transpose_x = x_transpose_x_0, transpose_y = x_transpose_y_0, x = input_441_cast_fp16, y = value_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
	tensor<int32, [4]> var_1580_perm_0 = const()[name = tensor<string, []>("op_1580_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
	tensor<int32, [3]> var_1582 = const()[name = tensor<string, []>("op_1582"), val = tensor<int32, [3]>([1, -1, 512])];
	tensor<fp16, [1, 128, 4, 128]> var_1580_cast_fp16 = transpose(perm = var_1580_perm_0, x = x_cast_fp16)[name = tensor<string, []>("transpose_96")];
	tensor<fp16, [1, 128, 512]> input_443_cast_fp16 = reshape(shape = var_1582, x = var_1580_cast_fp16)[name = tensor<string, []>("input_443_cast_fp16")];
	tensor<fp16, [512, 512]> d_decoders_15_src_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders_15_src_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50711168))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50973376))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [512]> d_decoders_15_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50974464)))];
	tensor<fp16, [1, 128, 512]> linear_79_cast_fp16 = linear(bias = d_decoders_15_src_attn_linear_out_bias_to_fp16, weight = d_decoders_15_src_attn_linear_out_weight_to_fp16_quantized, x = input_443_cast_fp16)[name = tensor<string, []>("linear_79_cast_fp16")];
	tensor<fp16, [1, 128, 512]> input_447_cast_fp16 = add(x = input_439_cast_fp16, y = linear_79_cast_fp16)[name = tensor<string, []>("input_447_cast_fp16")];
	tensor<int32, [1]> input_449_axes_0 = const()[name = tensor<string, []>("input_449_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_decoders3_0_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50975552)))];
	tensor<fp16, [512]> d_decoders3_0_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50976640)))];
	tensor<fp16, [1, 128, 512]> input_449_cast_fp16 = layer_norm(axes = input_449_axes_0, beta = d_decoders3_0_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders3_0_norm1_weight_to_fp16, x = input_447_cast_fp16)[name = tensor<string, []>("input_449_cast_fp16")];
	tensor<fp16, [2048, 512]> d_decoders3_0_feed_forward_w_1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders3_0_feed_forward_w_1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50977728))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52026368))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050880)))];
	tensor<fp16, [2048]> d_decoders3_0_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52030528)))];
	tensor<fp16, [1, 128, 2048]> linear_80_cast_fp16 = linear(bias = d_decoders3_0_feed_forward_w_1_bias_to_fp16, weight = d_decoders3_0_feed_forward_w_1_weight_to_fp16_quantized, x = input_449_cast_fp16)[name = tensor<string, []>("linear_80_cast_fp16")];
	tensor<fp16, [1, 128, 2048]> input_453_cast_fp16 = relu(x = linear_80_cast_fp16)[name = tensor<string, []>("input_453_cast_fp16")];
	tensor<int32, [1]> input_457_axes_0 = const()[name = tensor<string, []>("input_457_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [2048]> d_decoders3_0_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52034688)))];
	tensor<fp16, [2048]> d_decoders3_0_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52038848)))];
	tensor<fp16, [1, 128, 2048]> input_457_cast_fp16 = layer_norm(axes = input_457_axes_0, beta = d_decoders3_0_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders3_0_feed_forward_norm_weight_to_fp16, x = input_453_cast_fp16)[name = tensor<string, []>("input_457_cast_fp16")];
	tensor<fp16, [512, 2048]> d_decoders3_0_feed_forward_w_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_decoders3_0_feed_forward_w_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52043008))), scale = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53091648))), zero_point = tensor<int8, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2118272)))];
	tensor<fp16, [1, 128, 512]> linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders3_0_feed_forward_w_2_weight_to_fp16_quantized, x = input_457_cast_fp16)[name = tensor<string, []>("linear_81_cast_fp16")];
	tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
	tensor<fp16, [512]> d_after_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_after_norm_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53092736)))];
	tensor<fp16, [512]> d_after_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_after_norm_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53093824)))];
	tensor<fp16, [1, 128, 512]> input_cast_fp16 = layer_norm(axes = input_axes_0, beta = d_after_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_after_norm_weight_to_fp16, x = linear_81_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
	tensor<fp16, [8404, 512]> d_output_layer_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("d_output_layer_weight_to_fp16_quantized"), quantized_data = tensor<int8, [8404, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53094912))), scale = tensor<fp16, [8404]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57406336))), zero_point = tensor<int8, [8404]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57397824)))];
	tensor<fp16, [8404]> d_output_layer_bias_to_fp16 = const()[name = tensor<string, []>("d_output_layer_bias_to_fp16"), val = tensor<fp16, [8404]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57423232)))];
	tensor<fp16, [1, 128, 8404]> logits = linear(bias = d_output_layer_bias_to_fp16, weight = d_output_layer_weight_to_fp16_quantized, x = input_cast_fp16)[name = tensor<string, []>("linear_82_cast_fp16")];
	} -> (logits);
	}