alexwengg's picture
Add Paraformer-large zh CoreML (preprocessor + encoder fp16/ANE + decoder fp16/ANE + vocab + card)
d9a096e verified
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
{
func main<ios17>(tensor<fp32, [1, 128, 512]> ac, tensor<int32, [1]> elen, tensor<fp32, [1, 512, 512]> enc, tensor<int32, [1]> tn) {
tensor<int32, []> var_20 = const()[name = tensor<string, []>("op_20"), val = tensor<int32, []>(-1)];
tensor<int32, [128]> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<int32, [128]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127])];
tensor<int32, [1]> matrix_1_axes_0 = const()[name = tensor<string, []>("matrix_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<int32, [1, 1]> matrix_1 = expand_dims(axes = matrix_1_axes_0, x = tn)[name = tensor<string, []>("matrix_1")];
tensor<bool, [1, 128]> mask_1 = less(x = const_1, y = matrix_1)[name = tensor<string, []>("mask_1")];
tensor<int32, [1]> mask_9_axes_0 = const()[name = tensor<string, []>("mask_9_axes_0"), val = tensor<int32, [1]>([2])];
tensor<string, []> cast_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [1, 128]> mask_1_to_fp16 = cast(dtype = cast_0_to_fp16_dtype_0, x = mask_1)[name = tensor<string, []>("cast_88")];
tensor<fp16, [1, 128, 1]> mask_9_cast_fp16 = expand_dims(axes = mask_9_axes_0, x = mask_1_to_fp16)[name = tensor<string, []>("mask_9_cast_fp16")];
tensor<int32, [512]> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<int32, [512]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])];
tensor<int32, [1]> matrix_axes_0 = const()[name = tensor<string, []>("matrix_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<int32, [1, 1]> matrix = expand_dims(axes = matrix_axes_0, x = elen)[name = tensor<string, []>("matrix")];
tensor<bool, [1, 512]> mask_5 = less(x = const_3, y = matrix)[name = tensor<string, []>("mask_5")];
tensor<int32, [1]> var_51_axes_0 = const()[name = tensor<string, []>("op_51_axes_0"), val = tensor<int32, [1]>([1])];
tensor<string, []> cast_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [1, 512]> mask_5_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = mask_5)[name = tensor<string, []>("cast_87")];
tensor<fp16, [1, 1, 512]> var_51_cast_fp16 = expand_dims(axes = var_51_axes_0, x = mask_5_to_fp16)[name = tensor<string, []>("op_51_cast_fp16")];
tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<string, []> ac_to_fp16_dtype_0 = const()[name = tensor<string, []>("ac_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> d_decoders_0_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
tensor<fp16, [512]> d_decoders_0_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1152)))];
tensor<fp16, []> var_15_to_fp16 = const()[name = tensor<string, []>("op_15_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
tensor<fp16, [1, 128, 512]> ac_to_fp16 = cast(dtype = ac_to_fp16_dtype_0, x = ac)[name = tensor<string, []>("cast_86")];
tensor<fp16, [1, 128, 512]> input_1_cast_fp16 = layer_norm(axes = input_1_axes_0, beta = d_decoders_0_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_norm1_weight_to_fp16, x = ac_to_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_0_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2240)))];
tensor<fp16, [2048]> d_decoders_0_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099456)))];
tensor<fp16, [1, 128, 2048]> linear_0_cast_fp16 = linear(bias = d_decoders_0_feed_forward_w_1_bias_to_fp16, weight = d_decoders_0_feed_forward_w_1_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_5_cast_fp16 = relu(x = linear_0_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
tensor<int32, [1]> input_9_axes_0 = const()[name = tensor<string, []>("input_9_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_0_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2103616)))];
tensor<fp16, [2048]> d_decoders_0_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2107776)))];
tensor<fp16, [1, 128, 2048]> input_9_cast_fp16 = layer_norm(axes = input_9_axes_0, beta = d_decoders_0_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_feed_forward_norm_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_0_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2111936)))];
tensor<fp16, [512]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4209152)))];
tensor<fp16, [1, 128, 512]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_0_feed_forward_w_2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_0_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4210240)))];
tensor<fp16, [512]> d_decoders_0_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4211328)))];
tensor<fp16, [1, 128, 512]> inputs_1_cast_fp16 = layer_norm(axes = inputs_1_axes_0, beta = d_decoders_0_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_norm2_weight_to_fp16, x = linear_1_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_3_cast_fp16 = mul(x = inputs_1_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
tensor<int32, [3]> x_1_perm_0 = const()[name = tensor<string, []>("x_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_13_pad_0 = const()[name = tensor<string, []>("input_13_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_13_mode_0 = const()[name = tensor<string, []>("input_13_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_5_to_fp16 = const()[name = tensor<string, []>("const_5_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_1_cast_fp16 = transpose(perm = x_1_perm_0, x = inputs_3_cast_fp16)[name = tensor<string, []>("transpose_175")];
tensor<fp16, [1, 512, 138]> input_13_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = input_13_mode_0, pad = input_13_pad_0, x = x_1_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
tensor<string, []> x_3_pad_type_0 = const()[name = tensor<string, []>("x_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_3_groups_0 = const()[name = tensor<string, []>("x_3_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_3_strides_0 = const()[name = tensor<string, []>("x_3_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_3_pad_0 = const()[name = tensor<string, []>("x_3_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_3_dilations_0 = const()[name = tensor<string, []>("x_3_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_0_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4212416)))];
tensor<fp16, [1, 512, 128]> x_3_cast_fp16 = conv(dilations = x_3_dilations_0, groups = x_3_groups_0, pad = x_3_pad_0, pad_type = x_3_pad_type_0, strides = x_3_strides_0, weight = d_decoders_0_self_attn_fsmn_block_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
tensor<int32, [3]> x_5_perm_0 = const()[name = tensor<string, []>("x_5_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_5_cast_fp16 = transpose(perm = x_5_perm_0, x = x_3_cast_fp16)[name = tensor<string, []>("transpose_174")];
tensor<fp16, [1, 128, 512]> input_15_cast_fp16 = add(x = x_5_cast_fp16, y = inputs_3_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_17_cast_fp16 = mul(x = input_15_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_19_cast_fp16 = add(x = ac_to_fp16, y = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
tensor<int32, [1]> x_11_axes_0 = const()[name = tensor<string, []>("x_11_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_0_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4223744)))];
tensor<fp16, [512]> d_decoders_0_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4224832)))];
tensor<fp16, [1, 128, 512]> x_11_cast_fp16 = layer_norm(axes = x_11_axes_0, beta = d_decoders_0_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_0_norm3_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_0_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4225920)))];
tensor<fp16, [512]> d_decoders_0_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4750272)))];
tensor<fp16, [1, 128, 512]> linear_2_cast_fp16 = linear(bias = d_decoders_0_src_attn_linear_q_bias_to_fp16, weight = d_decoders_0_src_attn_linear_q_weight_to_fp16, x = x_11_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
tensor<int32, [4]> var_128 = const()[name = tensor<string, []>("op_128"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_129_cast_fp16 = reshape(shape = var_128, x = linear_2_cast_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
tensor<string, []> enc_to_fp16_dtype_0 = const()[name = tensor<string, []>("enc_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [1024, 512]> d_decoders_0_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4751360)))];
tensor<fp16, [1024]> d_decoders_0_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5800000)))];
tensor<fp16, [1, 512, 512]> enc_to_fp16 = cast(dtype = enc_to_fp16_dtype_0, x = enc)[name = tensor<string, []>("cast_85")];
tensor<fp16, [1, 512, 1024]> linear_3_cast_fp16 = linear(bias = d_decoders_0_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_0_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
tensor<int32, [2]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_134_axis_0 = const()[name = tensor<string, []>("op_134_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_134_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_134_cast_fp16_1 = split(axis = var_134_axis_0, split_sizes = tile_0, x = linear_3_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
tensor<int32, [4]> var_137 = const()[name = tensor<string, []>("op_137"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_138_cast_fp16 = reshape(shape = var_137, x = var_134_cast_fp16_0)[name = tensor<string, []>("op_138_cast_fp16")];
tensor<int32, [4]> var_140 = const()[name = tensor<string, []>("op_140"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_141_cast_fp16 = reshape(shape = var_140, x = var_134_cast_fp16_1)[name = tensor<string, []>("op_141_cast_fp16")];
tensor<int32, [4]> value_1_perm_0 = const()[name = tensor<string, []>("value_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_143_to_fp16 = const()[name = tensor<string, []>("op_143_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_3_cast_fp16 = mul(x = var_129_cast_fp16, y = var_143_to_fp16)[name = tensor<string, []>("q_h_3_cast_fp16")];
tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_48_perm_0 = const()[name = tensor<string, []>("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_49_perm_0 = const()[name = tensor<string, []>("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_49 = transpose(perm = transpose_49_perm_0, x = var_138_cast_fp16)[name = tensor<string, []>("transpose_171")];
tensor<fp16, [1, 4, 128, 128]> transpose_48 = transpose(perm = transpose_48_perm_0, x = q_h_3_cast_fp16)[name = tensor<string, []>("transpose_172")];
tensor<fp16, [1, 4, 128, 512]> scores_1_cast_fp16 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_48, y = transpose_49)[name = tensor<string, []>("scores_1_cast_fp16")];
tensor<int32, [1]> var_148_axes_0 = const()[name = tensor<string, []>("op_148_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [1, 1, 1, 512]> var_148_cast_fp16 = expand_dims(axes = var_148_axes_0, x = var_51_cast_fp16)[name = tensor<string, []>("op_148_cast_fp16")];
tensor<fp16, []> var_25_promoted_to_fp16 = const()[name = tensor<string, []>("op_25_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<bool, [1, 1, 1, 512]> mask_15_cast_fp16 = equal(x = var_148_cast_fp16, y = var_25_promoted_to_fp16)[name = tensor<string, []>("mask_15_cast_fp16")];
tensor<fp16, []> var_8_to_fp16 = const()[name = tensor<string, []>("op_8_to_fp16"), val = tensor<fp16, []>(-inf)];
tensor<fp16, [1, 4, 128, 512]> scores_3_cast_fp16 = select(a = var_8_to_fp16, b = scores_1_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_3_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_151_cast_fp16 = softmax(axis = var_20, x = scores_3_cast_fp16)[name = tensor<string, []>("op_151_cast_fp16")];
tensor<fp16, []> var_9_to_fp16 = const()[name = tensor<string, []>("op_9_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 4, 128, 512]> input_21_cast_fp16 = select(a = var_9_to_fp16, b = var_151_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
tensor<bool, []> x_13_transpose_x_0 = const()[name = tensor<string, []>("x_13_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_13_transpose_y_0 = const()[name = tensor<string, []>("x_13_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_1_cast_fp16 = transpose(perm = value_1_perm_0, x = var_141_cast_fp16)[name = tensor<string, []>("transpose_173")];
tensor<fp16, [1, 4, 128, 128]> x_13_cast_fp16 = matmul(transpose_x = x_13_transpose_x_0, transpose_y = x_13_transpose_y_0, x = input_21_cast_fp16, y = value_1_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
tensor<int32, [4]> var_155_perm_0 = const()[name = tensor<string, []>("op_155_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_157 = const()[name = tensor<string, []>("op_157"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_155_cast_fp16 = transpose(perm = var_155_perm_0, x = x_13_cast_fp16)[name = tensor<string, []>("transpose_170")];
tensor<fp16, [1, 128, 512]> input_23_cast_fp16 = reshape(shape = var_157, x = var_155_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_0_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5802112)))];
tensor<fp16, [512]> d_decoders_0_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_0_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6326464)))];
tensor<fp16, [1, 128, 512]> linear_4_cast_fp16 = linear(bias = d_decoders_0_src_attn_linear_out_bias_to_fp16, weight = d_decoders_0_src_attn_linear_out_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_27_cast_fp16 = add(x = input_19_cast_fp16, y = linear_4_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
tensor<int32, [1]> input_29_axes_0 = const()[name = tensor<string, []>("input_29_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_1_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6327552)))];
tensor<fp16, [512]> d_decoders_1_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6328640)))];
tensor<fp16, [1, 128, 512]> input_29_cast_fp16 = layer_norm(axes = input_29_axes_0, beta = d_decoders_1_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_norm1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_1_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6329728)))];
tensor<fp16, [2048]> d_decoders_1_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8426944)))];
tensor<fp16, [1, 128, 2048]> linear_5_cast_fp16 = linear(bias = d_decoders_1_feed_forward_w_1_bias_to_fp16, weight = d_decoders_1_feed_forward_w_1_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_33_cast_fp16 = relu(x = linear_5_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_1_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8431104)))];
tensor<fp16, [2048]> d_decoders_1_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8435264)))];
tensor<fp16, [1, 128, 2048]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = d_decoders_1_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_feed_forward_norm_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_1_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8439424)))];
tensor<fp16, [1, 128, 512]> linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_1_feed_forward_w_2_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
tensor<int32, [1]> inputs_5_axes_0 = const()[name = tensor<string, []>("inputs_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_1_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10536640)))];
tensor<fp16, [512]> d_decoders_1_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10537728)))];
tensor<fp16, [1, 128, 512]> inputs_5_cast_fp16 = layer_norm(axes = inputs_5_axes_0, beta = d_decoders_1_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_norm2_weight_to_fp16, x = linear_6_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_7_cast_fp16 = mul(x = inputs_5_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
tensor<int32, [3]> x_15_perm_0 = const()[name = tensor<string, []>("x_15_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_41_pad_0 = const()[name = tensor<string, []>("input_41_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_11_to_fp16 = const()[name = tensor<string, []>("const_11_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_15_cast_fp16 = transpose(perm = x_15_perm_0, x = inputs_7_cast_fp16)[name = tensor<string, []>("transpose_169")];
tensor<fp16, [1, 512, 138]> input_41_cast_fp16 = pad(constant_val = const_11_to_fp16, mode = input_41_mode_0, pad = input_41_pad_0, x = x_15_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
tensor<string, []> x_17_pad_type_0 = const()[name = tensor<string, []>("x_17_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_17_groups_0 = const()[name = tensor<string, []>("x_17_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_17_strides_0 = const()[name = tensor<string, []>("x_17_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_17_pad_0 = const()[name = tensor<string, []>("x_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_17_dilations_0 = const()[name = tensor<string, []>("x_17_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_1_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10538816)))];
tensor<fp16, [1, 512, 128]> x_17_cast_fp16 = conv(dilations = x_17_dilations_0, groups = x_17_groups_0, pad = x_17_pad_0, pad_type = x_17_pad_type_0, strides = x_17_strides_0, weight = d_decoders_1_self_attn_fsmn_block_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
tensor<int32, [3]> x_19_perm_0 = const()[name = tensor<string, []>("x_19_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_19_cast_fp16 = transpose(perm = x_19_perm_0, x = x_17_cast_fp16)[name = tensor<string, []>("transpose_168")];
tensor<fp16, [1, 128, 512]> input_43_cast_fp16 = add(x = x_19_cast_fp16, y = inputs_7_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_45_cast_fp16 = mul(x = input_43_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_47_cast_fp16 = add(x = input_27_cast_fp16, y = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
tensor<int32, [1]> x_25_axes_0 = const()[name = tensor<string, []>("x_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_1_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10550144)))];
tensor<fp16, [512]> d_decoders_1_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10551232)))];
tensor<fp16, [1, 128, 512]> x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, beta = d_decoders_1_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_1_norm3_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_1_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10552320)))];
tensor<fp16, [512]> d_decoders_1_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11076672)))];
tensor<fp16, [1, 128, 512]> linear_7_cast_fp16 = linear(bias = d_decoders_1_src_attn_linear_q_bias_to_fp16, weight = d_decoders_1_src_attn_linear_q_weight_to_fp16, x = x_25_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
tensor<int32, [4]> var_223 = const()[name = tensor<string, []>("op_223"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_224_cast_fp16 = reshape(shape = var_223, x = linear_7_cast_fp16)[name = tensor<string, []>("op_224_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_1_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11077760)))];
tensor<fp16, [1024]> d_decoders_1_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12126400)))];
tensor<fp16, [1, 512, 1024]> linear_8_cast_fp16 = linear(bias = d_decoders_1_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_1_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
tensor<int32, [2]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_229_axis_0 = const()[name = tensor<string, []>("op_229_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_229_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_229_cast_fp16_1 = split(axis = var_229_axis_0, split_sizes = tile_1, x = linear_8_cast_fp16)[name = tensor<string, []>("op_229_cast_fp16")];
tensor<int32, [4]> var_232 = const()[name = tensor<string, []>("op_232"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_233_cast_fp16 = reshape(shape = var_232, x = var_229_cast_fp16_0)[name = tensor<string, []>("op_233_cast_fp16")];
tensor<int32, [4]> var_235 = const()[name = tensor<string, []>("op_235"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_236_cast_fp16 = reshape(shape = var_235, x = var_229_cast_fp16_1)[name = tensor<string, []>("op_236_cast_fp16")];
tensor<int32, [4]> value_3_perm_0 = const()[name = tensor<string, []>("value_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_238_to_fp16 = const()[name = tensor<string, []>("op_238_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_7_cast_fp16 = mul(x = var_224_cast_fp16, y = var_238_to_fp16)[name = tensor<string, []>("q_h_7_cast_fp16")];
tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_50_perm_0 = const()[name = tensor<string, []>("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_51_perm_0 = const()[name = tensor<string, []>("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_51 = transpose(perm = transpose_51_perm_0, x = var_233_cast_fp16)[name = tensor<string, []>("transpose_165")];
tensor<fp16, [1, 4, 128, 128]> transpose_50 = transpose(perm = transpose_50_perm_0, x = q_h_7_cast_fp16)[name = tensor<string, []>("transpose_166")];
tensor<fp16, [1, 4, 128, 512]> scores_5_cast_fp16 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_50, y = transpose_51)[name = tensor<string, []>("scores_5_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_7_cast_fp16 = select(a = var_8_to_fp16, b = scores_5_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_7_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_246_cast_fp16 = softmax(axis = var_20, x = scores_7_cast_fp16)[name = tensor<string, []>("op_246_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_49_cast_fp16 = select(a = var_9_to_fp16, b = var_246_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
tensor<bool, []> x_27_transpose_x_0 = const()[name = tensor<string, []>("x_27_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_27_transpose_y_0 = const()[name = tensor<string, []>("x_27_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_3_cast_fp16 = transpose(perm = value_3_perm_0, x = var_236_cast_fp16)[name = tensor<string, []>("transpose_167")];
tensor<fp16, [1, 4, 128, 128]> x_27_cast_fp16 = matmul(transpose_x = x_27_transpose_x_0, transpose_y = x_27_transpose_y_0, x = input_49_cast_fp16, y = value_3_cast_fp16)[name = tensor<string, []>("x_27_cast_fp16")];
tensor<int32, [4]> var_250_perm_0 = const()[name = tensor<string, []>("op_250_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_252 = const()[name = tensor<string, []>("op_252"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_250_cast_fp16 = transpose(perm = var_250_perm_0, x = x_27_cast_fp16)[name = tensor<string, []>("transpose_164")];
tensor<fp16, [1, 128, 512]> input_51_cast_fp16 = reshape(shape = var_252, x = var_250_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_1_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12128512)))];
tensor<fp16, [512]> d_decoders_1_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_1_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12652864)))];
tensor<fp16, [1, 128, 512]> linear_9_cast_fp16 = linear(bias = d_decoders_1_src_attn_linear_out_bias_to_fp16, weight = d_decoders_1_src_attn_linear_out_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_55_cast_fp16 = add(x = input_47_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_2_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12653952)))];
tensor<fp16, [512]> d_decoders_2_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12655040)))];
tensor<fp16, [1, 128, 512]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = d_decoders_2_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_norm1_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_2_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12656128)))];
tensor<fp16, [2048]> d_decoders_2_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14753344)))];
tensor<fp16, [1, 128, 2048]> linear_10_cast_fp16 = linear(bias = d_decoders_2_feed_forward_w_1_bias_to_fp16, weight = d_decoders_2_feed_forward_w_1_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_61_cast_fp16 = relu(x = linear_10_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
tensor<int32, [1]> input_65_axes_0 = const()[name = tensor<string, []>("input_65_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_2_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14757504)))];
tensor<fp16, [2048]> d_decoders_2_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14761664)))];
tensor<fp16, [1, 128, 2048]> input_65_cast_fp16 = layer_norm(axes = input_65_axes_0, beta = d_decoders_2_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_feed_forward_norm_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_2_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14765824)))];
tensor<fp16, [1, 128, 512]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_2_feed_forward_w_2_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
tensor<int32, [1]> inputs_9_axes_0 = const()[name = tensor<string, []>("inputs_9_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_2_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16863040)))];
tensor<fp16, [512]> d_decoders_2_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16864128)))];
tensor<fp16, [1, 128, 512]> inputs_9_cast_fp16 = layer_norm(axes = inputs_9_axes_0, beta = d_decoders_2_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_norm2_weight_to_fp16, x = linear_11_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_11_cast_fp16 = mul(x = inputs_9_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
tensor<int32, [3]> x_29_perm_0 = const()[name = tensor<string, []>("x_29_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_17_to_fp16 = const()[name = tensor<string, []>("const_17_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_29_cast_fp16 = transpose(perm = x_29_perm_0, x = inputs_11_cast_fp16)[name = tensor<string, []>("transpose_163")];
tensor<fp16, [1, 512, 138]> input_69_cast_fp16 = pad(constant_val = const_17_to_fp16, mode = input_69_mode_0, pad = input_69_pad_0, x = x_29_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
tensor<string, []> x_31_pad_type_0 = const()[name = tensor<string, []>("x_31_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_31_groups_0 = const()[name = tensor<string, []>("x_31_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_31_strides_0 = const()[name = tensor<string, []>("x_31_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_31_pad_0 = const()[name = tensor<string, []>("x_31_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_31_dilations_0 = const()[name = tensor<string, []>("x_31_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_2_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16865216)))];
tensor<fp16, [1, 512, 128]> x_31_cast_fp16 = conv(dilations = x_31_dilations_0, groups = x_31_groups_0, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = x_31_strides_0, weight = d_decoders_2_self_attn_fsmn_block_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
tensor<int32, [3]> x_33_perm_0 = const()[name = tensor<string, []>("x_33_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_33_cast_fp16 = transpose(perm = x_33_perm_0, x = x_31_cast_fp16)[name = tensor<string, []>("transpose_162")];
tensor<fp16, [1, 128, 512]> input_71_cast_fp16 = add(x = x_33_cast_fp16, y = inputs_11_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_73_cast_fp16 = mul(x = input_71_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_75_cast_fp16 = add(x = input_55_cast_fp16, y = input_73_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
tensor<int32, [1]> x_39_axes_0 = const()[name = tensor<string, []>("x_39_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_2_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16876544)))];
tensor<fp16, [512]> d_decoders_2_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16877632)))];
tensor<fp16, [1, 128, 512]> x_39_cast_fp16 = layer_norm(axes = x_39_axes_0, beta = d_decoders_2_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_2_norm3_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_2_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16878720)))];
tensor<fp16, [512]> d_decoders_2_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17403072)))];
tensor<fp16, [1, 128, 512]> linear_12_cast_fp16 = linear(bias = d_decoders_2_src_attn_linear_q_bias_to_fp16, weight = d_decoders_2_src_attn_linear_q_weight_to_fp16, x = x_39_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
tensor<int32, [4]> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_319_cast_fp16 = reshape(shape = var_318, x = linear_12_cast_fp16)[name = tensor<string, []>("op_319_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_2_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17404160)))];
tensor<fp16, [1024]> d_decoders_2_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18452800)))];
tensor<fp16, [1, 512, 1024]> linear_13_cast_fp16 = linear(bias = d_decoders_2_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_2_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
tensor<int32, [2]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_324_axis_0 = const()[name = tensor<string, []>("op_324_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_324_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_324_cast_fp16_1 = split(axis = var_324_axis_0, split_sizes = tile_2, x = linear_13_cast_fp16)[name = tensor<string, []>("op_324_cast_fp16")];
tensor<int32, [4]> var_327 = const()[name = tensor<string, []>("op_327"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_328_cast_fp16 = reshape(shape = var_327, x = var_324_cast_fp16_0)[name = tensor<string, []>("op_328_cast_fp16")];
tensor<int32, [4]> var_330 = const()[name = tensor<string, []>("op_330"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_331_cast_fp16 = reshape(shape = var_330, x = var_324_cast_fp16_1)[name = tensor<string, []>("op_331_cast_fp16")];
tensor<int32, [4]> value_5_perm_0 = const()[name = tensor<string, []>("value_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_333_to_fp16 = const()[name = tensor<string, []>("op_333_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_11_cast_fp16 = mul(x = var_319_cast_fp16, y = var_333_to_fp16)[name = tensor<string, []>("q_h_11_cast_fp16")];
tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_52_perm_0 = const()[name = tensor<string, []>("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_53_perm_0 = const()[name = tensor<string, []>("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_53 = transpose(perm = transpose_53_perm_0, x = var_328_cast_fp16)[name = tensor<string, []>("transpose_159")];
tensor<fp16, [1, 4, 128, 128]> transpose_52 = transpose(perm = transpose_52_perm_0, x = q_h_11_cast_fp16)[name = tensor<string, []>("transpose_160")];
tensor<fp16, [1, 4, 128, 512]> scores_9_cast_fp16 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_52, y = transpose_53)[name = tensor<string, []>("scores_9_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_11_cast_fp16 = select(a = var_8_to_fp16, b = scores_9_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_11_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_341_cast_fp16 = softmax(axis = var_20, x = scores_11_cast_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_77_cast_fp16 = select(a = var_9_to_fp16, b = var_341_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
tensor<bool, []> x_41_transpose_x_0 = const()[name = tensor<string, []>("x_41_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_41_transpose_y_0 = const()[name = tensor<string, []>("x_41_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_5_cast_fp16 = transpose(perm = value_5_perm_0, x = var_331_cast_fp16)[name = tensor<string, []>("transpose_161")];
tensor<fp16, [1, 4, 128, 128]> x_41_cast_fp16 = matmul(transpose_x = x_41_transpose_x_0, transpose_y = x_41_transpose_y_0, x = input_77_cast_fp16, y = value_5_cast_fp16)[name = tensor<string, []>("x_41_cast_fp16")];
tensor<int32, [4]> var_345_perm_0 = const()[name = tensor<string, []>("op_345_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_347 = const()[name = tensor<string, []>("op_347"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_345_cast_fp16 = transpose(perm = var_345_perm_0, x = x_41_cast_fp16)[name = tensor<string, []>("transpose_158")];
tensor<fp16, [1, 128, 512]> input_79_cast_fp16 = reshape(shape = var_347, x = var_345_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_2_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18454912)))];
tensor<fp16, [512]> d_decoders_2_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_2_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18979264)))];
tensor<fp16, [1, 128, 512]> linear_14_cast_fp16 = linear(bias = d_decoders_2_src_attn_linear_out_bias_to_fp16, weight = d_decoders_2_src_attn_linear_out_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_83_cast_fp16 = add(x = input_75_cast_fp16, y = linear_14_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
tensor<int32, [1]> input_85_axes_0 = const()[name = tensor<string, []>("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_3_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18980352)))];
tensor<fp16, [512]> d_decoders_3_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18981440)))];
tensor<fp16, [1, 128, 512]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = d_decoders_3_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_norm1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_3_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18982528)))];
tensor<fp16, [2048]> d_decoders_3_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21079744)))];
tensor<fp16, [1, 128, 2048]> linear_15_cast_fp16 = linear(bias = d_decoders_3_feed_forward_w_1_bias_to_fp16, weight = d_decoders_3_feed_forward_w_1_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_89_cast_fp16 = relu(x = linear_15_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_3_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21083904)))];
tensor<fp16, [2048]> d_decoders_3_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21088064)))];
tensor<fp16, [1, 128, 2048]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = d_decoders_3_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_feed_forward_norm_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_3_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21092224)))];
tensor<fp16, [1, 128, 512]> linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_3_feed_forward_w_2_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
tensor<int32, [1]> inputs_13_axes_0 = const()[name = tensor<string, []>("inputs_13_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_3_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23189440)))];
tensor<fp16, [512]> d_decoders_3_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23190528)))];
tensor<fp16, [1, 128, 512]> inputs_13_cast_fp16 = layer_norm(axes = inputs_13_axes_0, beta = d_decoders_3_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_norm2_weight_to_fp16, x = linear_16_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_15_cast_fp16 = mul(x = inputs_13_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
tensor<int32, [3]> x_43_perm_0 = const()[name = tensor<string, []>("x_43_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_97_pad_0 = const()[name = tensor<string, []>("input_97_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_97_mode_0 = const()[name = tensor<string, []>("input_97_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_23_to_fp16 = const()[name = tensor<string, []>("const_23_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_43_cast_fp16 = transpose(perm = x_43_perm_0, x = inputs_15_cast_fp16)[name = tensor<string, []>("transpose_157")];
tensor<fp16, [1, 512, 138]> input_97_cast_fp16 = pad(constant_val = const_23_to_fp16, mode = input_97_mode_0, pad = input_97_pad_0, x = x_43_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
tensor<string, []> x_45_pad_type_0 = const()[name = tensor<string, []>("x_45_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_45_groups_0 = const()[name = tensor<string, []>("x_45_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_45_strides_0 = const()[name = tensor<string, []>("x_45_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_45_pad_0 = const()[name = tensor<string, []>("x_45_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_45_dilations_0 = const()[name = tensor<string, []>("x_45_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_3_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23191616)))];
tensor<fp16, [1, 512, 128]> x_45_cast_fp16 = conv(dilations = x_45_dilations_0, groups = x_45_groups_0, pad = x_45_pad_0, pad_type = x_45_pad_type_0, strides = x_45_strides_0, weight = d_decoders_3_self_attn_fsmn_block_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("x_45_cast_fp16")];
tensor<int32, [3]> x_47_perm_0 = const()[name = tensor<string, []>("x_47_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_47_cast_fp16 = transpose(perm = x_47_perm_0, x = x_45_cast_fp16)[name = tensor<string, []>("transpose_156")];
tensor<fp16, [1, 128, 512]> input_99_cast_fp16 = add(x = x_47_cast_fp16, y = inputs_15_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_101_cast_fp16 = mul(x = input_99_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_103_cast_fp16 = add(x = input_83_cast_fp16, y = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
tensor<int32, [1]> x_53_axes_0 = const()[name = tensor<string, []>("x_53_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_3_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23202944)))];
tensor<fp16, [512]> d_decoders_3_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23204032)))];
tensor<fp16, [1, 128, 512]> x_53_cast_fp16 = layer_norm(axes = x_53_axes_0, beta = d_decoders_3_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_3_norm3_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("x_53_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_3_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23205120)))];
tensor<fp16, [512]> d_decoders_3_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23729472)))];
tensor<fp16, [1, 128, 512]> linear_17_cast_fp16 = linear(bias = d_decoders_3_src_attn_linear_q_bias_to_fp16, weight = d_decoders_3_src_attn_linear_q_weight_to_fp16, x = x_53_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
tensor<int32, [4]> var_413 = const()[name = tensor<string, []>("op_413"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_414_cast_fp16 = reshape(shape = var_413, x = linear_17_cast_fp16)[name = tensor<string, []>("op_414_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_3_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23730560)))];
tensor<fp16, [1024]> d_decoders_3_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24779200)))];
tensor<fp16, [1, 512, 1024]> linear_18_cast_fp16 = linear(bias = d_decoders_3_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_3_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
tensor<int32, [2]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_419_axis_0 = const()[name = tensor<string, []>("op_419_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_419_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_419_cast_fp16_1 = split(axis = var_419_axis_0, split_sizes = tile_3, x = linear_18_cast_fp16)[name = tensor<string, []>("op_419_cast_fp16")];
tensor<int32, [4]> var_422 = const()[name = tensor<string, []>("op_422"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_423_cast_fp16 = reshape(shape = var_422, x = var_419_cast_fp16_0)[name = tensor<string, []>("op_423_cast_fp16")];
tensor<int32, [4]> var_425 = const()[name = tensor<string, []>("op_425"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_426_cast_fp16 = reshape(shape = var_425, x = var_419_cast_fp16_1)[name = tensor<string, []>("op_426_cast_fp16")];
tensor<int32, [4]> value_7_perm_0 = const()[name = tensor<string, []>("value_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_428_to_fp16 = const()[name = tensor<string, []>("op_428_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_15_cast_fp16 = mul(x = var_414_cast_fp16, y = var_428_to_fp16)[name = tensor<string, []>("q_h_15_cast_fp16")];
tensor<bool, []> scores_13_transpose_x_0 = const()[name = tensor<string, []>("scores_13_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_13_transpose_y_0 = const()[name = tensor<string, []>("scores_13_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_54_perm_0 = const()[name = tensor<string, []>("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_55_perm_0 = const()[name = tensor<string, []>("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_55 = transpose(perm = transpose_55_perm_0, x = var_423_cast_fp16)[name = tensor<string, []>("transpose_153")];
tensor<fp16, [1, 4, 128, 128]> transpose_54 = transpose(perm = transpose_54_perm_0, x = q_h_15_cast_fp16)[name = tensor<string, []>("transpose_154")];
tensor<fp16, [1, 4, 128, 512]> scores_13_cast_fp16 = matmul(transpose_x = scores_13_transpose_x_0, transpose_y = scores_13_transpose_y_0, x = transpose_54, y = transpose_55)[name = tensor<string, []>("scores_13_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_15_cast_fp16 = select(a = var_8_to_fp16, b = scores_13_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_15_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_436_cast_fp16 = softmax(axis = var_20, x = scores_15_cast_fp16)[name = tensor<string, []>("op_436_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_105_cast_fp16 = select(a = var_9_to_fp16, b = var_436_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
tensor<bool, []> x_55_transpose_x_0 = const()[name = tensor<string, []>("x_55_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_55_transpose_y_0 = const()[name = tensor<string, []>("x_55_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_7_cast_fp16 = transpose(perm = value_7_perm_0, x = var_426_cast_fp16)[name = tensor<string, []>("transpose_155")];
tensor<fp16, [1, 4, 128, 128]> x_55_cast_fp16 = matmul(transpose_x = x_55_transpose_x_0, transpose_y = x_55_transpose_y_0, x = input_105_cast_fp16, y = value_7_cast_fp16)[name = tensor<string, []>("x_55_cast_fp16")];
tensor<int32, [4]> var_440_perm_0 = const()[name = tensor<string, []>("op_440_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_442 = const()[name = tensor<string, []>("op_442"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_440_cast_fp16 = transpose(perm = var_440_perm_0, x = x_55_cast_fp16)[name = tensor<string, []>("transpose_152")];
tensor<fp16, [1, 128, 512]> input_107_cast_fp16 = reshape(shape = var_442, x = var_440_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_3_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24781312)))];
tensor<fp16, [512]> d_decoders_3_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_3_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25305664)))];
tensor<fp16, [1, 128, 512]> linear_19_cast_fp16 = linear(bias = d_decoders_3_src_attn_linear_out_bias_to_fp16, weight = d_decoders_3_src_attn_linear_out_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_111_cast_fp16 = add(x = input_103_cast_fp16, y = linear_19_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_4_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25306752)))];
tensor<fp16, [512]> d_decoders_4_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25307840)))];
tensor<fp16, [1, 128, 512]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = d_decoders_4_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_norm1_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_4_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25308928)))];
tensor<fp16, [2048]> d_decoders_4_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27406144)))];
tensor<fp16, [1, 128, 2048]> linear_20_cast_fp16 = linear(bias = d_decoders_4_feed_forward_w_1_bias_to_fp16, weight = d_decoders_4_feed_forward_w_1_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_117_cast_fp16 = relu(x = linear_20_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
tensor<int32, [1]> input_121_axes_0 = const()[name = tensor<string, []>("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_4_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27410304)))];
tensor<fp16, [2048]> d_decoders_4_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27414464)))];
tensor<fp16, [1, 128, 2048]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = d_decoders_4_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_feed_forward_norm_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_4_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27418624)))];
tensor<fp16, [1, 128, 512]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_4_feed_forward_w_2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
tensor<int32, [1]> inputs_17_axes_0 = const()[name = tensor<string, []>("inputs_17_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_4_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29515840)))];
tensor<fp16, [512]> d_decoders_4_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29516928)))];
tensor<fp16, [1, 128, 512]> inputs_17_cast_fp16 = layer_norm(axes = inputs_17_axes_0, beta = d_decoders_4_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_norm2_weight_to_fp16, x = linear_21_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_19_cast_fp16 = mul(x = inputs_17_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
tensor<int32, [3]> x_57_perm_0 = const()[name = tensor<string, []>("x_57_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_125_pad_0 = const()[name = tensor<string, []>("input_125_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_125_mode_0 = const()[name = tensor<string, []>("input_125_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_29_to_fp16 = const()[name = tensor<string, []>("const_29_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_57_cast_fp16 = transpose(perm = x_57_perm_0, x = inputs_19_cast_fp16)[name = tensor<string, []>("transpose_151")];
tensor<fp16, [1, 512, 138]> input_125_cast_fp16 = pad(constant_val = const_29_to_fp16, mode = input_125_mode_0, pad = input_125_pad_0, x = x_57_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
tensor<string, []> x_59_pad_type_0 = const()[name = tensor<string, []>("x_59_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_59_groups_0 = const()[name = tensor<string, []>("x_59_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_59_strides_0 = const()[name = tensor<string, []>("x_59_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_59_pad_0 = const()[name = tensor<string, []>("x_59_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_59_dilations_0 = const()[name = tensor<string, []>("x_59_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_4_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29518016)))];
tensor<fp16, [1, 512, 128]> x_59_cast_fp16 = conv(dilations = x_59_dilations_0, groups = x_59_groups_0, pad = x_59_pad_0, pad_type = x_59_pad_type_0, strides = x_59_strides_0, weight = d_decoders_4_self_attn_fsmn_block_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("x_59_cast_fp16")];
tensor<int32, [3]> x_61_perm_0 = const()[name = tensor<string, []>("x_61_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_61_cast_fp16 = transpose(perm = x_61_perm_0, x = x_59_cast_fp16)[name = tensor<string, []>("transpose_150")];
tensor<fp16, [1, 128, 512]> input_127_cast_fp16 = add(x = x_61_cast_fp16, y = inputs_19_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_129_cast_fp16 = mul(x = input_127_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_131_cast_fp16 = add(x = input_111_cast_fp16, y = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
tensor<int32, [1]> x_67_axes_0 = const()[name = tensor<string, []>("x_67_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_4_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29529344)))];
tensor<fp16, [512]> d_decoders_4_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29530432)))];
tensor<fp16, [1, 128, 512]> x_67_cast_fp16 = layer_norm(axes = x_67_axes_0, beta = d_decoders_4_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_4_norm3_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("x_67_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_4_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29531520)))];
tensor<fp16, [512]> d_decoders_4_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30055872)))];
tensor<fp16, [1, 128, 512]> linear_22_cast_fp16 = linear(bias = d_decoders_4_src_attn_linear_q_bias_to_fp16, weight = d_decoders_4_src_attn_linear_q_weight_to_fp16, x = x_67_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
tensor<int32, [4]> var_508 = const()[name = tensor<string, []>("op_508"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_509_cast_fp16 = reshape(shape = var_508, x = linear_22_cast_fp16)[name = tensor<string, []>("op_509_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_4_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30056960)))];
tensor<fp16, [1024]> d_decoders_4_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31105600)))];
tensor<fp16, [1, 512, 1024]> linear_23_cast_fp16 = linear(bias = d_decoders_4_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_4_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
tensor<int32, [2]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_514_axis_0 = const()[name = tensor<string, []>("op_514_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_514_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_514_cast_fp16_1 = split(axis = var_514_axis_0, split_sizes = tile_4, x = linear_23_cast_fp16)[name = tensor<string, []>("op_514_cast_fp16")];
tensor<int32, [4]> var_517 = const()[name = tensor<string, []>("op_517"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_518_cast_fp16 = reshape(shape = var_517, x = var_514_cast_fp16_0)[name = tensor<string, []>("op_518_cast_fp16")];
tensor<int32, [4]> var_520 = const()[name = tensor<string, []>("op_520"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_521_cast_fp16 = reshape(shape = var_520, x = var_514_cast_fp16_1)[name = tensor<string, []>("op_521_cast_fp16")];
tensor<int32, [4]> value_9_perm_0 = const()[name = tensor<string, []>("value_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_523_to_fp16 = const()[name = tensor<string, []>("op_523_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_19_cast_fp16 = mul(x = var_509_cast_fp16, y = var_523_to_fp16)[name = tensor<string, []>("q_h_19_cast_fp16")];
tensor<bool, []> scores_17_transpose_x_0 = const()[name = tensor<string, []>("scores_17_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_17_transpose_y_0 = const()[name = tensor<string, []>("scores_17_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_56_perm_0 = const()[name = tensor<string, []>("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_57_perm_0 = const()[name = tensor<string, []>("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_57 = transpose(perm = transpose_57_perm_0, x = var_518_cast_fp16)[name = tensor<string, []>("transpose_147")];
tensor<fp16, [1, 4, 128, 128]> transpose_56 = transpose(perm = transpose_56_perm_0, x = q_h_19_cast_fp16)[name = tensor<string, []>("transpose_148")];
tensor<fp16, [1, 4, 128, 512]> scores_17_cast_fp16 = matmul(transpose_x = scores_17_transpose_x_0, transpose_y = scores_17_transpose_y_0, x = transpose_56, y = transpose_57)[name = tensor<string, []>("scores_17_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_19_cast_fp16 = select(a = var_8_to_fp16, b = scores_17_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_19_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_531_cast_fp16 = softmax(axis = var_20, x = scores_19_cast_fp16)[name = tensor<string, []>("op_531_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_133_cast_fp16 = select(a = var_9_to_fp16, b = var_531_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
tensor<bool, []> x_69_transpose_x_0 = const()[name = tensor<string, []>("x_69_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_69_transpose_y_0 = const()[name = tensor<string, []>("x_69_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_9_cast_fp16 = transpose(perm = value_9_perm_0, x = var_521_cast_fp16)[name = tensor<string, []>("transpose_149")];
tensor<fp16, [1, 4, 128, 128]> x_69_cast_fp16 = matmul(transpose_x = x_69_transpose_x_0, transpose_y = x_69_transpose_y_0, x = input_133_cast_fp16, y = value_9_cast_fp16)[name = tensor<string, []>("x_69_cast_fp16")];
tensor<int32, [4]> var_535_perm_0 = const()[name = tensor<string, []>("op_535_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_537 = const()[name = tensor<string, []>("op_537"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_535_cast_fp16 = transpose(perm = var_535_perm_0, x = x_69_cast_fp16)[name = tensor<string, []>("transpose_146")];
tensor<fp16, [1, 128, 512]> input_135_cast_fp16 = reshape(shape = var_537, x = var_535_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_4_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31107712)))];
tensor<fp16, [512]> d_decoders_4_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_4_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31632064)))];
tensor<fp16, [1, 128, 512]> linear_24_cast_fp16 = linear(bias = d_decoders_4_src_attn_linear_out_bias_to_fp16, weight = d_decoders_4_src_attn_linear_out_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_139_cast_fp16 = add(x = input_131_cast_fp16, y = linear_24_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
tensor<int32, [1]> input_141_axes_0 = const()[name = tensor<string, []>("input_141_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_5_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31633152)))];
tensor<fp16, [512]> d_decoders_5_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31634240)))];
tensor<fp16, [1, 128, 512]> input_141_cast_fp16 = layer_norm(axes = input_141_axes_0, beta = d_decoders_5_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_norm1_weight_to_fp16, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_5_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31635328)))];
tensor<fp16, [2048]> d_decoders_5_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33732544)))];
tensor<fp16, [1, 128, 2048]> linear_25_cast_fp16 = linear(bias = d_decoders_5_feed_forward_w_1_bias_to_fp16, weight = d_decoders_5_feed_forward_w_1_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_145_cast_fp16 = relu(x = linear_25_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
tensor<int32, [1]> input_149_axes_0 = const()[name = tensor<string, []>("input_149_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_5_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33736704)))];
tensor<fp16, [2048]> d_decoders_5_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33740864)))];
tensor<fp16, [1, 128, 2048]> input_149_cast_fp16 = layer_norm(axes = input_149_axes_0, beta = d_decoders_5_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_feed_forward_norm_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_5_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33745024)))];
tensor<fp16, [1, 128, 512]> linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_5_feed_forward_w_2_weight_to_fp16, x = input_149_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
tensor<int32, [1]> inputs_21_axes_0 = const()[name = tensor<string, []>("inputs_21_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_5_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35842240)))];
tensor<fp16, [512]> d_decoders_5_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35843328)))];
tensor<fp16, [1, 128, 512]> inputs_21_cast_fp16 = layer_norm(axes = inputs_21_axes_0, beta = d_decoders_5_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_norm2_weight_to_fp16, x = linear_26_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_23_cast_fp16 = mul(x = inputs_21_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
tensor<int32, [3]> x_71_perm_0 = const()[name = tensor<string, []>("x_71_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_153_pad_0 = const()[name = tensor<string, []>("input_153_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_153_mode_0 = const()[name = tensor<string, []>("input_153_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_35_to_fp16 = const()[name = tensor<string, []>("const_35_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_71_cast_fp16 = transpose(perm = x_71_perm_0, x = inputs_23_cast_fp16)[name = tensor<string, []>("transpose_145")];
tensor<fp16, [1, 512, 138]> input_153_cast_fp16 = pad(constant_val = const_35_to_fp16, mode = input_153_mode_0, pad = input_153_pad_0, x = x_71_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
tensor<string, []> x_73_pad_type_0 = const()[name = tensor<string, []>("x_73_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_73_groups_0 = const()[name = tensor<string, []>("x_73_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_73_strides_0 = const()[name = tensor<string, []>("x_73_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_73_pad_0 = const()[name = tensor<string, []>("x_73_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_73_dilations_0 = const()[name = tensor<string, []>("x_73_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_5_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35844416)))];
tensor<fp16, [1, 512, 128]> x_73_cast_fp16 = conv(dilations = x_73_dilations_0, groups = x_73_groups_0, pad = x_73_pad_0, pad_type = x_73_pad_type_0, strides = x_73_strides_0, weight = d_decoders_5_self_attn_fsmn_block_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("x_73_cast_fp16")];
tensor<int32, [3]> x_75_perm_0 = const()[name = tensor<string, []>("x_75_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_75_cast_fp16 = transpose(perm = x_75_perm_0, x = x_73_cast_fp16)[name = tensor<string, []>("transpose_144")];
tensor<fp16, [1, 128, 512]> input_155_cast_fp16 = add(x = x_75_cast_fp16, y = inputs_23_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_157_cast_fp16 = mul(x = input_155_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_159_cast_fp16 = add(x = input_139_cast_fp16, y = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
tensor<int32, [1]> x_81_axes_0 = const()[name = tensor<string, []>("x_81_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_5_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35855744)))];
tensor<fp16, [512]> d_decoders_5_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35856832)))];
tensor<fp16, [1, 128, 512]> x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, beta = d_decoders_5_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_5_norm3_weight_to_fp16, x = input_159_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_5_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35857920)))];
tensor<fp16, [512]> d_decoders_5_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36382272)))];
tensor<fp16, [1, 128, 512]> linear_27_cast_fp16 = linear(bias = d_decoders_5_src_attn_linear_q_bias_to_fp16, weight = d_decoders_5_src_attn_linear_q_weight_to_fp16, x = x_81_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
tensor<int32, [4]> var_603 = const()[name = tensor<string, []>("op_603"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_604_cast_fp16 = reshape(shape = var_603, x = linear_27_cast_fp16)[name = tensor<string, []>("op_604_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_5_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36383360)))];
tensor<fp16, [1024]> d_decoders_5_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37432000)))];
tensor<fp16, [1, 512, 1024]> linear_28_cast_fp16 = linear(bias = d_decoders_5_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_5_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
tensor<int32, [2]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_609_axis_0 = const()[name = tensor<string, []>("op_609_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_609_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_609_cast_fp16_1 = split(axis = var_609_axis_0, split_sizes = tile_5, x = linear_28_cast_fp16)[name = tensor<string, []>("op_609_cast_fp16")];
tensor<int32, [4]> var_612 = const()[name = tensor<string, []>("op_612"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_613_cast_fp16 = reshape(shape = var_612, x = var_609_cast_fp16_0)[name = tensor<string, []>("op_613_cast_fp16")];
tensor<int32, [4]> var_615 = const()[name = tensor<string, []>("op_615"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_616_cast_fp16 = reshape(shape = var_615, x = var_609_cast_fp16_1)[name = tensor<string, []>("op_616_cast_fp16")];
tensor<int32, [4]> value_11_perm_0 = const()[name = tensor<string, []>("value_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_618_to_fp16 = const()[name = tensor<string, []>("op_618_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_23_cast_fp16 = mul(x = var_604_cast_fp16, y = var_618_to_fp16)[name = tensor<string, []>("q_h_23_cast_fp16")];
tensor<bool, []> scores_21_transpose_x_0 = const()[name = tensor<string, []>("scores_21_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_21_transpose_y_0 = const()[name = tensor<string, []>("scores_21_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_58_perm_0 = const()[name = tensor<string, []>("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_59_perm_0 = const()[name = tensor<string, []>("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_59 = transpose(perm = transpose_59_perm_0, x = var_613_cast_fp16)[name = tensor<string, []>("transpose_141")];
tensor<fp16, [1, 4, 128, 128]> transpose_58 = transpose(perm = transpose_58_perm_0, x = q_h_23_cast_fp16)[name = tensor<string, []>("transpose_142")];
tensor<fp16, [1, 4, 128, 512]> scores_21_cast_fp16 = matmul(transpose_x = scores_21_transpose_x_0, transpose_y = scores_21_transpose_y_0, x = transpose_58, y = transpose_59)[name = tensor<string, []>("scores_21_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_23_cast_fp16 = select(a = var_8_to_fp16, b = scores_21_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_23_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_626_cast_fp16 = softmax(axis = var_20, x = scores_23_cast_fp16)[name = tensor<string, []>("op_626_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_161_cast_fp16 = select(a = var_9_to_fp16, b = var_626_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
tensor<bool, []> x_83_transpose_x_0 = const()[name = tensor<string, []>("x_83_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_83_transpose_y_0 = const()[name = tensor<string, []>("x_83_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_11_cast_fp16 = transpose(perm = value_11_perm_0, x = var_616_cast_fp16)[name = tensor<string, []>("transpose_143")];
tensor<fp16, [1, 4, 128, 128]> x_83_cast_fp16 = matmul(transpose_x = x_83_transpose_x_0, transpose_y = x_83_transpose_y_0, x = input_161_cast_fp16, y = value_11_cast_fp16)[name = tensor<string, []>("x_83_cast_fp16")];
tensor<int32, [4]> var_630_perm_0 = const()[name = tensor<string, []>("op_630_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_632 = const()[name = tensor<string, []>("op_632"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_630_cast_fp16 = transpose(perm = var_630_perm_0, x = x_83_cast_fp16)[name = tensor<string, []>("transpose_140")];
tensor<fp16, [1, 128, 512]> input_163_cast_fp16 = reshape(shape = var_632, x = var_630_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_5_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37434112)))];
tensor<fp16, [512]> d_decoders_5_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_5_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37958464)))];
tensor<fp16, [1, 128, 512]> linear_29_cast_fp16 = linear(bias = d_decoders_5_src_attn_linear_out_bias_to_fp16, weight = d_decoders_5_src_attn_linear_out_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_167_cast_fp16 = add(x = input_159_cast_fp16, y = linear_29_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
tensor<int32, [1]> input_169_axes_0 = const()[name = tensor<string, []>("input_169_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_6_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37959552)))];
tensor<fp16, [512]> d_decoders_6_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37960640)))];
tensor<fp16, [1, 128, 512]> input_169_cast_fp16 = layer_norm(axes = input_169_axes_0, beta = d_decoders_6_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_norm1_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_6_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37961728)))];
tensor<fp16, [2048]> d_decoders_6_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40058944)))];
tensor<fp16, [1, 128, 2048]> linear_30_cast_fp16 = linear(bias = d_decoders_6_feed_forward_w_1_bias_to_fp16, weight = d_decoders_6_feed_forward_w_1_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_173_cast_fp16 = relu(x = linear_30_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_6_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40063104)))];
tensor<fp16, [2048]> d_decoders_6_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40067264)))];
tensor<fp16, [1, 128, 2048]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = d_decoders_6_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_feed_forward_norm_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_6_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40071424)))];
tensor<fp16, [1, 128, 512]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_6_feed_forward_w_2_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
tensor<int32, [1]> inputs_25_axes_0 = const()[name = tensor<string, []>("inputs_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_6_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42168640)))];
tensor<fp16, [512]> d_decoders_6_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42169728)))];
tensor<fp16, [1, 128, 512]> inputs_25_cast_fp16 = layer_norm(axes = inputs_25_axes_0, beta = d_decoders_6_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_norm2_weight_to_fp16, x = linear_31_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_27_cast_fp16 = mul(x = inputs_25_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
tensor<int32, [3]> x_85_perm_0 = const()[name = tensor<string, []>("x_85_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_181_pad_0 = const()[name = tensor<string, []>("input_181_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_41_to_fp16 = const()[name = tensor<string, []>("const_41_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_85_cast_fp16 = transpose(perm = x_85_perm_0, x = inputs_27_cast_fp16)[name = tensor<string, []>("transpose_139")];
tensor<fp16, [1, 512, 138]> input_181_cast_fp16 = pad(constant_val = const_41_to_fp16, mode = input_181_mode_0, pad = input_181_pad_0, x = x_85_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
tensor<string, []> x_87_pad_type_0 = const()[name = tensor<string, []>("x_87_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_87_groups_0 = const()[name = tensor<string, []>("x_87_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_87_strides_0 = const()[name = tensor<string, []>("x_87_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_87_pad_0 = const()[name = tensor<string, []>("x_87_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_87_dilations_0 = const()[name = tensor<string, []>("x_87_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_6_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42170816)))];
tensor<fp16, [1, 512, 128]> x_87_cast_fp16 = conv(dilations = x_87_dilations_0, groups = x_87_groups_0, pad = x_87_pad_0, pad_type = x_87_pad_type_0, strides = x_87_strides_0, weight = d_decoders_6_self_attn_fsmn_block_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("x_87_cast_fp16")];
tensor<int32, [3]> x_89_perm_0 = const()[name = tensor<string, []>("x_89_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_89_cast_fp16 = transpose(perm = x_89_perm_0, x = x_87_cast_fp16)[name = tensor<string, []>("transpose_138")];
tensor<fp16, [1, 128, 512]> input_183_cast_fp16 = add(x = x_89_cast_fp16, y = inputs_27_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_185_cast_fp16 = mul(x = input_183_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_187_cast_fp16 = add(x = input_167_cast_fp16, y = input_185_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
tensor<int32, [1]> x_95_axes_0 = const()[name = tensor<string, []>("x_95_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_6_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42182144)))];
tensor<fp16, [512]> d_decoders_6_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42183232)))];
tensor<fp16, [1, 128, 512]> x_95_cast_fp16 = layer_norm(axes = x_95_axes_0, beta = d_decoders_6_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_6_norm3_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("x_95_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_6_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42184320)))];
tensor<fp16, [512]> d_decoders_6_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42708672)))];
tensor<fp16, [1, 128, 512]> linear_32_cast_fp16 = linear(bias = d_decoders_6_src_attn_linear_q_bias_to_fp16, weight = d_decoders_6_src_attn_linear_q_weight_to_fp16, x = x_95_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
tensor<int32, [4]> var_698 = const()[name = tensor<string, []>("op_698"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_699_cast_fp16 = reshape(shape = var_698, x = linear_32_cast_fp16)[name = tensor<string, []>("op_699_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_6_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42709760)))];
tensor<fp16, [1024]> d_decoders_6_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43758400)))];
tensor<fp16, [1, 512, 1024]> linear_33_cast_fp16 = linear(bias = d_decoders_6_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_6_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
tensor<int32, [2]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_704_axis_0 = const()[name = tensor<string, []>("op_704_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_704_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_704_cast_fp16_1 = split(axis = var_704_axis_0, split_sizes = tile_6, x = linear_33_cast_fp16)[name = tensor<string, []>("op_704_cast_fp16")];
tensor<int32, [4]> var_707 = const()[name = tensor<string, []>("op_707"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_708_cast_fp16 = reshape(shape = var_707, x = var_704_cast_fp16_0)[name = tensor<string, []>("op_708_cast_fp16")];
tensor<int32, [4]> var_710 = const()[name = tensor<string, []>("op_710"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_711_cast_fp16 = reshape(shape = var_710, x = var_704_cast_fp16_1)[name = tensor<string, []>("op_711_cast_fp16")];
tensor<int32, [4]> value_13_perm_0 = const()[name = tensor<string, []>("value_13_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_713_to_fp16 = const()[name = tensor<string, []>("op_713_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_27_cast_fp16 = mul(x = var_699_cast_fp16, y = var_713_to_fp16)[name = tensor<string, []>("q_h_27_cast_fp16")];
tensor<bool, []> scores_25_transpose_x_0 = const()[name = tensor<string, []>("scores_25_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_25_transpose_y_0 = const()[name = tensor<string, []>("scores_25_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_60_perm_0 = const()[name = tensor<string, []>("transpose_60_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_61_perm_0 = const()[name = tensor<string, []>("transpose_61_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_61 = transpose(perm = transpose_61_perm_0, x = var_708_cast_fp16)[name = tensor<string, []>("transpose_135")];
tensor<fp16, [1, 4, 128, 128]> transpose_60 = transpose(perm = transpose_60_perm_0, x = q_h_27_cast_fp16)[name = tensor<string, []>("transpose_136")];
tensor<fp16, [1, 4, 128, 512]> scores_25_cast_fp16 = matmul(transpose_x = scores_25_transpose_x_0, transpose_y = scores_25_transpose_y_0, x = transpose_60, y = transpose_61)[name = tensor<string, []>("scores_25_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_27_cast_fp16 = select(a = var_8_to_fp16, b = scores_25_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_27_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_721_cast_fp16 = softmax(axis = var_20, x = scores_27_cast_fp16)[name = tensor<string, []>("op_721_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_189_cast_fp16 = select(a = var_9_to_fp16, b = var_721_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
tensor<bool, []> x_97_transpose_x_0 = const()[name = tensor<string, []>("x_97_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_97_transpose_y_0 = const()[name = tensor<string, []>("x_97_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_13_cast_fp16 = transpose(perm = value_13_perm_0, x = var_711_cast_fp16)[name = tensor<string, []>("transpose_137")];
tensor<fp16, [1, 4, 128, 128]> x_97_cast_fp16 = matmul(transpose_x = x_97_transpose_x_0, transpose_y = x_97_transpose_y_0, x = input_189_cast_fp16, y = value_13_cast_fp16)[name = tensor<string, []>("x_97_cast_fp16")];
tensor<int32, [4]> var_725_perm_0 = const()[name = tensor<string, []>("op_725_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_727 = const()[name = tensor<string, []>("op_727"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_725_cast_fp16 = transpose(perm = var_725_perm_0, x = x_97_cast_fp16)[name = tensor<string, []>("transpose_134")];
tensor<fp16, [1, 128, 512]> input_191_cast_fp16 = reshape(shape = var_727, x = var_725_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_6_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43760512)))];
tensor<fp16, [512]> d_decoders_6_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_6_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44284864)))];
tensor<fp16, [1, 128, 512]> linear_34_cast_fp16 = linear(bias = d_decoders_6_src_attn_linear_out_bias_to_fp16, weight = d_decoders_6_src_attn_linear_out_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_195_cast_fp16 = add(x = input_187_cast_fp16, y = linear_34_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_7_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44285952)))];
tensor<fp16, [512]> d_decoders_7_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44287040)))];
tensor<fp16, [1, 128, 512]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = d_decoders_7_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_norm1_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_7_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44288128)))];
tensor<fp16, [2048]> d_decoders_7_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46385344)))];
tensor<fp16, [1, 128, 2048]> linear_35_cast_fp16 = linear(bias = d_decoders_7_feed_forward_w_1_bias_to_fp16, weight = d_decoders_7_feed_forward_w_1_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_201_cast_fp16 = relu(x = linear_35_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
tensor<int32, [1]> input_205_axes_0 = const()[name = tensor<string, []>("input_205_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_7_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46389504)))];
tensor<fp16, [2048]> d_decoders_7_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46393664)))];
tensor<fp16, [1, 128, 2048]> input_205_cast_fp16 = layer_norm(axes = input_205_axes_0, beta = d_decoders_7_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_feed_forward_norm_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("input_205_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_7_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46397824)))];
tensor<fp16, [1, 128, 512]> linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_7_feed_forward_w_2_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
tensor<int32, [1]> inputs_29_axes_0 = const()[name = tensor<string, []>("inputs_29_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_7_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48495040)))];
tensor<fp16, [512]> d_decoders_7_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48496128)))];
tensor<fp16, [1, 128, 512]> inputs_29_cast_fp16 = layer_norm(axes = inputs_29_axes_0, beta = d_decoders_7_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_norm2_weight_to_fp16, x = linear_36_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_31_cast_fp16 = mul(x = inputs_29_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
tensor<int32, [3]> x_99_perm_0 = const()[name = tensor<string, []>("x_99_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_209_mode_0 = const()[name = tensor<string, []>("input_209_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_47_to_fp16 = const()[name = tensor<string, []>("const_47_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_99_cast_fp16 = transpose(perm = x_99_perm_0, x = inputs_31_cast_fp16)[name = tensor<string, []>("transpose_133")];
tensor<fp16, [1, 512, 138]> input_209_cast_fp16 = pad(constant_val = const_47_to_fp16, mode = input_209_mode_0, pad = input_209_pad_0, x = x_99_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
tensor<string, []> x_101_pad_type_0 = const()[name = tensor<string, []>("x_101_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_101_groups_0 = const()[name = tensor<string, []>("x_101_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_101_strides_0 = const()[name = tensor<string, []>("x_101_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_101_pad_0 = const()[name = tensor<string, []>("x_101_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_101_dilations_0 = const()[name = tensor<string, []>("x_101_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_7_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48497216)))];
tensor<fp16, [1, 512, 128]> x_101_cast_fp16 = conv(dilations = x_101_dilations_0, groups = x_101_groups_0, pad = x_101_pad_0, pad_type = x_101_pad_type_0, strides = x_101_strides_0, weight = d_decoders_7_self_attn_fsmn_block_weight_to_fp16, x = input_209_cast_fp16)[name = tensor<string, []>("x_101_cast_fp16")];
tensor<int32, [3]> x_103_perm_0 = const()[name = tensor<string, []>("x_103_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_103_cast_fp16 = transpose(perm = x_103_perm_0, x = x_101_cast_fp16)[name = tensor<string, []>("transpose_132")];
tensor<fp16, [1, 128, 512]> input_211_cast_fp16 = add(x = x_103_cast_fp16, y = inputs_31_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_213_cast_fp16 = mul(x = input_211_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_215_cast_fp16 = add(x = input_195_cast_fp16, y = input_213_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
tensor<int32, [1]> x_109_axes_0 = const()[name = tensor<string, []>("x_109_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_7_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48508544)))];
tensor<fp16, [512]> d_decoders_7_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48509632)))];
tensor<fp16, [1, 128, 512]> x_109_cast_fp16 = layer_norm(axes = x_109_axes_0, beta = d_decoders_7_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_7_norm3_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("x_109_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_7_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48510720)))];
tensor<fp16, [512]> d_decoders_7_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49035072)))];
tensor<fp16, [1, 128, 512]> linear_37_cast_fp16 = linear(bias = d_decoders_7_src_attn_linear_q_bias_to_fp16, weight = d_decoders_7_src_attn_linear_q_weight_to_fp16, x = x_109_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
tensor<int32, [4]> var_793 = const()[name = tensor<string, []>("op_793"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_794_cast_fp16 = reshape(shape = var_793, x = linear_37_cast_fp16)[name = tensor<string, []>("op_794_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_7_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49036160)))];
tensor<fp16, [1024]> d_decoders_7_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50084800)))];
tensor<fp16, [1, 512, 1024]> linear_38_cast_fp16 = linear(bias = d_decoders_7_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_7_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
tensor<int32, [2]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_799_axis_0 = const()[name = tensor<string, []>("op_799_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_799_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_799_cast_fp16_1 = split(axis = var_799_axis_0, split_sizes = tile_7, x = linear_38_cast_fp16)[name = tensor<string, []>("op_799_cast_fp16")];
tensor<int32, [4]> var_802 = const()[name = tensor<string, []>("op_802"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_803_cast_fp16 = reshape(shape = var_802, x = var_799_cast_fp16_0)[name = tensor<string, []>("op_803_cast_fp16")];
tensor<int32, [4]> var_805 = const()[name = tensor<string, []>("op_805"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_806_cast_fp16 = reshape(shape = var_805, x = var_799_cast_fp16_1)[name = tensor<string, []>("op_806_cast_fp16")];
tensor<int32, [4]> value_15_perm_0 = const()[name = tensor<string, []>("value_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_808_to_fp16 = const()[name = tensor<string, []>("op_808_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_31_cast_fp16 = mul(x = var_794_cast_fp16, y = var_808_to_fp16)[name = tensor<string, []>("q_h_31_cast_fp16")];
tensor<bool, []> scores_29_transpose_x_0 = const()[name = tensor<string, []>("scores_29_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_29_transpose_y_0 = const()[name = tensor<string, []>("scores_29_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_62_perm_0 = const()[name = tensor<string, []>("transpose_62_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_63_perm_0 = const()[name = tensor<string, []>("transpose_63_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_63 = transpose(perm = transpose_63_perm_0, x = var_803_cast_fp16)[name = tensor<string, []>("transpose_129")];
tensor<fp16, [1, 4, 128, 128]> transpose_62 = transpose(perm = transpose_62_perm_0, x = q_h_31_cast_fp16)[name = tensor<string, []>("transpose_130")];
tensor<fp16, [1, 4, 128, 512]> scores_29_cast_fp16 = matmul(transpose_x = scores_29_transpose_x_0, transpose_y = scores_29_transpose_y_0, x = transpose_62, y = transpose_63)[name = tensor<string, []>("scores_29_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_31_cast_fp16 = select(a = var_8_to_fp16, b = scores_29_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_31_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_816_cast_fp16 = softmax(axis = var_20, x = scores_31_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_217_cast_fp16 = select(a = var_9_to_fp16, b = var_816_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
tensor<bool, []> x_111_transpose_x_0 = const()[name = tensor<string, []>("x_111_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_111_transpose_y_0 = const()[name = tensor<string, []>("x_111_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_15_cast_fp16 = transpose(perm = value_15_perm_0, x = var_806_cast_fp16)[name = tensor<string, []>("transpose_131")];
tensor<fp16, [1, 4, 128, 128]> x_111_cast_fp16 = matmul(transpose_x = x_111_transpose_x_0, transpose_y = x_111_transpose_y_0, x = input_217_cast_fp16, y = value_15_cast_fp16)[name = tensor<string, []>("x_111_cast_fp16")];
tensor<int32, [4]> var_820_perm_0 = const()[name = tensor<string, []>("op_820_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_822 = const()[name = tensor<string, []>("op_822"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_820_cast_fp16 = transpose(perm = var_820_perm_0, x = x_111_cast_fp16)[name = tensor<string, []>("transpose_128")];
tensor<fp16, [1, 128, 512]> input_219_cast_fp16 = reshape(shape = var_822, x = var_820_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_7_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50086912)))];
tensor<fp16, [512]> d_decoders_7_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_7_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50611264)))];
tensor<fp16, [1, 128, 512]> linear_39_cast_fp16 = linear(bias = d_decoders_7_src_attn_linear_out_bias_to_fp16, weight = d_decoders_7_src_attn_linear_out_weight_to_fp16, x = input_219_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_223_cast_fp16 = add(x = input_215_cast_fp16, y = linear_39_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
tensor<int32, [1]> input_225_axes_0 = const()[name = tensor<string, []>("input_225_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_8_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50612352)))];
tensor<fp16, [512]> d_decoders_8_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50613440)))];
tensor<fp16, [1, 128, 512]> input_225_cast_fp16 = layer_norm(axes = input_225_axes_0, beta = d_decoders_8_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_norm1_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("input_225_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_8_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50614528)))];
tensor<fp16, [2048]> d_decoders_8_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52711744)))];
tensor<fp16, [1, 128, 2048]> linear_40_cast_fp16 = linear(bias = d_decoders_8_feed_forward_w_1_bias_to_fp16, weight = d_decoders_8_feed_forward_w_1_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_229_cast_fp16 = relu(x = linear_40_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_8_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52715904)))];
tensor<fp16, [2048]> d_decoders_8_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52720064)))];
tensor<fp16, [1, 128, 2048]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = d_decoders_8_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_feed_forward_norm_weight_to_fp16, x = input_229_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_8_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52724224)))];
tensor<fp16, [1, 128, 512]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_8_feed_forward_w_2_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
tensor<int32, [1]> inputs_33_axes_0 = const()[name = tensor<string, []>("inputs_33_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_8_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54821440)))];
tensor<fp16, [512]> d_decoders_8_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54822528)))];
tensor<fp16, [1, 128, 512]> inputs_33_cast_fp16 = layer_norm(axes = inputs_33_axes_0, beta = d_decoders_8_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_norm2_weight_to_fp16, x = linear_41_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_35_cast_fp16 = mul(x = inputs_33_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
tensor<int32, [3]> x_113_perm_0 = const()[name = tensor<string, []>("x_113_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_237_pad_0 = const()[name = tensor<string, []>("input_237_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_237_mode_0 = const()[name = tensor<string, []>("input_237_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_53_to_fp16 = const()[name = tensor<string, []>("const_53_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_113_cast_fp16 = transpose(perm = x_113_perm_0, x = inputs_35_cast_fp16)[name = tensor<string, []>("transpose_127")];
tensor<fp16, [1, 512, 138]> input_237_cast_fp16 = pad(constant_val = const_53_to_fp16, mode = input_237_mode_0, pad = input_237_pad_0, x = x_113_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
tensor<string, []> x_115_pad_type_0 = const()[name = tensor<string, []>("x_115_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_115_groups_0 = const()[name = tensor<string, []>("x_115_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_115_strides_0 = const()[name = tensor<string, []>("x_115_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_115_pad_0 = const()[name = tensor<string, []>("x_115_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_115_dilations_0 = const()[name = tensor<string, []>("x_115_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_8_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54823616)))];
tensor<fp16, [1, 512, 128]> x_115_cast_fp16 = conv(dilations = x_115_dilations_0, groups = x_115_groups_0, pad = x_115_pad_0, pad_type = x_115_pad_type_0, strides = x_115_strides_0, weight = d_decoders_8_self_attn_fsmn_block_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("x_115_cast_fp16")];
tensor<int32, [3]> x_117_perm_0 = const()[name = tensor<string, []>("x_117_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_117_cast_fp16 = transpose(perm = x_117_perm_0, x = x_115_cast_fp16)[name = tensor<string, []>("transpose_126")];
tensor<fp16, [1, 128, 512]> input_239_cast_fp16 = add(x = x_117_cast_fp16, y = inputs_35_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_241_cast_fp16 = mul(x = input_239_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_243_cast_fp16 = add(x = input_223_cast_fp16, y = input_241_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
tensor<int32, [1]> x_123_axes_0 = const()[name = tensor<string, []>("x_123_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_8_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54834944)))];
tensor<fp16, [512]> d_decoders_8_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54836032)))];
tensor<fp16, [1, 128, 512]> x_123_cast_fp16 = layer_norm(axes = x_123_axes_0, beta = d_decoders_8_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_8_norm3_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("x_123_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_8_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54837120)))];
tensor<fp16, [512]> d_decoders_8_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55361472)))];
tensor<fp16, [1, 128, 512]> linear_42_cast_fp16 = linear(bias = d_decoders_8_src_attn_linear_q_bias_to_fp16, weight = d_decoders_8_src_attn_linear_q_weight_to_fp16, x = x_123_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")];
tensor<int32, [4]> var_888 = const()[name = tensor<string, []>("op_888"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_889_cast_fp16 = reshape(shape = var_888, x = linear_42_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_8_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55362560)))];
tensor<fp16, [1024]> d_decoders_8_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56411200)))];
tensor<fp16, [1, 512, 1024]> linear_43_cast_fp16 = linear(bias = d_decoders_8_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_8_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
tensor<int32, [2]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_894_axis_0 = const()[name = tensor<string, []>("op_894_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_894_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_894_cast_fp16_1 = split(axis = var_894_axis_0, split_sizes = tile_8, x = linear_43_cast_fp16)[name = tensor<string, []>("op_894_cast_fp16")];
tensor<int32, [4]> var_897 = const()[name = tensor<string, []>("op_897"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_898_cast_fp16 = reshape(shape = var_897, x = var_894_cast_fp16_0)[name = tensor<string, []>("op_898_cast_fp16")];
tensor<int32, [4]> var_900 = const()[name = tensor<string, []>("op_900"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_901_cast_fp16 = reshape(shape = var_900, x = var_894_cast_fp16_1)[name = tensor<string, []>("op_901_cast_fp16")];
tensor<int32, [4]> value_17_perm_0 = const()[name = tensor<string, []>("value_17_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_903_to_fp16 = const()[name = tensor<string, []>("op_903_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_35_cast_fp16 = mul(x = var_889_cast_fp16, y = var_903_to_fp16)[name = tensor<string, []>("q_h_35_cast_fp16")];
tensor<bool, []> scores_33_transpose_x_0 = const()[name = tensor<string, []>("scores_33_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_33_transpose_y_0 = const()[name = tensor<string, []>("scores_33_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_64_perm_0 = const()[name = tensor<string, []>("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_65_perm_0 = const()[name = tensor<string, []>("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_65 = transpose(perm = transpose_65_perm_0, x = var_898_cast_fp16)[name = tensor<string, []>("transpose_123")];
tensor<fp16, [1, 4, 128, 128]> transpose_64 = transpose(perm = transpose_64_perm_0, x = q_h_35_cast_fp16)[name = tensor<string, []>("transpose_124")];
tensor<fp16, [1, 4, 128, 512]> scores_33_cast_fp16 = matmul(transpose_x = scores_33_transpose_x_0, transpose_y = scores_33_transpose_y_0, x = transpose_64, y = transpose_65)[name = tensor<string, []>("scores_33_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_35_cast_fp16 = select(a = var_8_to_fp16, b = scores_33_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_35_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_911_cast_fp16 = softmax(axis = var_20, x = scores_35_cast_fp16)[name = tensor<string, []>("op_911_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_245_cast_fp16 = select(a = var_9_to_fp16, b = var_911_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_245_cast_fp16")];
tensor<bool, []> x_125_transpose_x_0 = const()[name = tensor<string, []>("x_125_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_125_transpose_y_0 = const()[name = tensor<string, []>("x_125_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_17_cast_fp16 = transpose(perm = value_17_perm_0, x = var_901_cast_fp16)[name = tensor<string, []>("transpose_125")];
tensor<fp16, [1, 4, 128, 128]> x_125_cast_fp16 = matmul(transpose_x = x_125_transpose_x_0, transpose_y = x_125_transpose_y_0, x = input_245_cast_fp16, y = value_17_cast_fp16)[name = tensor<string, []>("x_125_cast_fp16")];
tensor<int32, [4]> var_915_perm_0 = const()[name = tensor<string, []>("op_915_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_917 = const()[name = tensor<string, []>("op_917"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_915_cast_fp16 = transpose(perm = var_915_perm_0, x = x_125_cast_fp16)[name = tensor<string, []>("transpose_122")];
tensor<fp16, [1, 128, 512]> input_247_cast_fp16 = reshape(shape = var_917, x = var_915_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_8_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56413312)))];
tensor<fp16, [512]> d_decoders_8_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_8_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56937664)))];
tensor<fp16, [1, 128, 512]> linear_44_cast_fp16 = linear(bias = d_decoders_8_src_attn_linear_out_bias_to_fp16, weight = d_decoders_8_src_attn_linear_out_weight_to_fp16, x = input_247_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_251_cast_fp16 = add(x = input_243_cast_fp16, y = linear_44_cast_fp16)[name = tensor<string, []>("input_251_cast_fp16")];
tensor<int32, [1]> input_253_axes_0 = const()[name = tensor<string, []>("input_253_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_9_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56938752)))];
tensor<fp16, [512]> d_decoders_9_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56939840)))];
tensor<fp16, [1, 128, 512]> input_253_cast_fp16 = layer_norm(axes = input_253_axes_0, beta = d_decoders_9_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_norm1_weight_to_fp16, x = input_251_cast_fp16)[name = tensor<string, []>("input_253_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_9_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56940928)))];
tensor<fp16, [2048]> d_decoders_9_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59038144)))];
tensor<fp16, [1, 128, 2048]> linear_45_cast_fp16 = linear(bias = d_decoders_9_feed_forward_w_1_bias_to_fp16, weight = d_decoders_9_feed_forward_w_1_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_257_cast_fp16 = relu(x = linear_45_cast_fp16)[name = tensor<string, []>("input_257_cast_fp16")];
tensor<int32, [1]> input_261_axes_0 = const()[name = tensor<string, []>("input_261_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_9_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59042304)))];
tensor<fp16, [2048]> d_decoders_9_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59046464)))];
tensor<fp16, [1, 128, 2048]> input_261_cast_fp16 = layer_norm(axes = input_261_axes_0, beta = d_decoders_9_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_feed_forward_norm_weight_to_fp16, x = input_257_cast_fp16)[name = tensor<string, []>("input_261_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_9_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59050624)))];
tensor<fp16, [1, 128, 512]> linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_9_feed_forward_w_2_weight_to_fp16, x = input_261_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
tensor<int32, [1]> inputs_37_axes_0 = const()[name = tensor<string, []>("inputs_37_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_9_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61147840)))];
tensor<fp16, [512]> d_decoders_9_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61148928)))];
tensor<fp16, [1, 128, 512]> inputs_37_cast_fp16 = layer_norm(axes = inputs_37_axes_0, beta = d_decoders_9_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_norm2_weight_to_fp16, x = linear_46_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_39_cast_fp16 = mul(x = inputs_37_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
tensor<int32, [3]> x_127_perm_0 = const()[name = tensor<string, []>("x_127_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_265_pad_0 = const()[name = tensor<string, []>("input_265_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_265_mode_0 = const()[name = tensor<string, []>("input_265_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_59_to_fp16 = const()[name = tensor<string, []>("const_59_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_127_cast_fp16 = transpose(perm = x_127_perm_0, x = inputs_39_cast_fp16)[name = tensor<string, []>("transpose_121")];
tensor<fp16, [1, 512, 138]> input_265_cast_fp16 = pad(constant_val = const_59_to_fp16, mode = input_265_mode_0, pad = input_265_pad_0, x = x_127_cast_fp16)[name = tensor<string, []>("input_265_cast_fp16")];
tensor<string, []> x_129_pad_type_0 = const()[name = tensor<string, []>("x_129_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_129_groups_0 = const()[name = tensor<string, []>("x_129_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_129_strides_0 = const()[name = tensor<string, []>("x_129_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_129_pad_0 = const()[name = tensor<string, []>("x_129_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_129_dilations_0 = const()[name = tensor<string, []>("x_129_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_9_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61150016)))];
tensor<fp16, [1, 512, 128]> x_129_cast_fp16 = conv(dilations = x_129_dilations_0, groups = x_129_groups_0, pad = x_129_pad_0, pad_type = x_129_pad_type_0, strides = x_129_strides_0, weight = d_decoders_9_self_attn_fsmn_block_weight_to_fp16, x = input_265_cast_fp16)[name = tensor<string, []>("x_129_cast_fp16")];
tensor<int32, [3]> x_131_perm_0 = const()[name = tensor<string, []>("x_131_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_131_cast_fp16 = transpose(perm = x_131_perm_0, x = x_129_cast_fp16)[name = tensor<string, []>("transpose_120")];
tensor<fp16, [1, 128, 512]> input_267_cast_fp16 = add(x = x_131_cast_fp16, y = inputs_39_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_269_cast_fp16 = mul(x = input_267_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_271_cast_fp16 = add(x = input_251_cast_fp16, y = input_269_cast_fp16)[name = tensor<string, []>("input_271_cast_fp16")];
tensor<int32, [1]> x_137_axes_0 = const()[name = tensor<string, []>("x_137_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_9_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61161344)))];
tensor<fp16, [512]> d_decoders_9_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61162432)))];
tensor<fp16, [1, 128, 512]> x_137_cast_fp16 = layer_norm(axes = x_137_axes_0, beta = d_decoders_9_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_9_norm3_weight_to_fp16, x = input_271_cast_fp16)[name = tensor<string, []>("x_137_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_9_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61163520)))];
tensor<fp16, [512]> d_decoders_9_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61687872)))];
tensor<fp16, [1, 128, 512]> linear_47_cast_fp16 = linear(bias = d_decoders_9_src_attn_linear_q_bias_to_fp16, weight = d_decoders_9_src_attn_linear_q_weight_to_fp16, x = x_137_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
tensor<int32, [4]> var_983 = const()[name = tensor<string, []>("op_983"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_984_cast_fp16 = reshape(shape = var_983, x = linear_47_cast_fp16)[name = tensor<string, []>("op_984_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_9_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61688960)))];
tensor<fp16, [1024]> d_decoders_9_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62737600)))];
tensor<fp16, [1, 512, 1024]> linear_48_cast_fp16 = linear(bias = d_decoders_9_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_9_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
tensor<int32, [2]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_989_axis_0 = const()[name = tensor<string, []>("op_989_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_989_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_989_cast_fp16_1 = split(axis = var_989_axis_0, split_sizes = tile_9, x = linear_48_cast_fp16)[name = tensor<string, []>("op_989_cast_fp16")];
tensor<int32, [4]> var_992 = const()[name = tensor<string, []>("op_992"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_993_cast_fp16 = reshape(shape = var_992, x = var_989_cast_fp16_0)[name = tensor<string, []>("op_993_cast_fp16")];
tensor<int32, [4]> var_995 = const()[name = tensor<string, []>("op_995"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_996_cast_fp16 = reshape(shape = var_995, x = var_989_cast_fp16_1)[name = tensor<string, []>("op_996_cast_fp16")];
tensor<int32, [4]> value_19_perm_0 = const()[name = tensor<string, []>("value_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_998_to_fp16 = const()[name = tensor<string, []>("op_998_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_39_cast_fp16 = mul(x = var_984_cast_fp16, y = var_998_to_fp16)[name = tensor<string, []>("q_h_39_cast_fp16")];
tensor<bool, []> scores_37_transpose_x_0 = const()[name = tensor<string, []>("scores_37_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_37_transpose_y_0 = const()[name = tensor<string, []>("scores_37_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_66_perm_0 = const()[name = tensor<string, []>("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_67_perm_0 = const()[name = tensor<string, []>("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_67 = transpose(perm = transpose_67_perm_0, x = var_993_cast_fp16)[name = tensor<string, []>("transpose_117")];
tensor<fp16, [1, 4, 128, 128]> transpose_66 = transpose(perm = transpose_66_perm_0, x = q_h_39_cast_fp16)[name = tensor<string, []>("transpose_118")];
tensor<fp16, [1, 4, 128, 512]> scores_37_cast_fp16 = matmul(transpose_x = scores_37_transpose_x_0, transpose_y = scores_37_transpose_y_0, x = transpose_66, y = transpose_67)[name = tensor<string, []>("scores_37_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_39_cast_fp16 = select(a = var_8_to_fp16, b = scores_37_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_39_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1006_cast_fp16 = softmax(axis = var_20, x = scores_39_cast_fp16)[name = tensor<string, []>("op_1006_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_273_cast_fp16 = select(a = var_9_to_fp16, b = var_1006_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
tensor<bool, []> x_139_transpose_x_0 = const()[name = tensor<string, []>("x_139_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_139_transpose_y_0 = const()[name = tensor<string, []>("x_139_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_19_cast_fp16 = transpose(perm = value_19_perm_0, x = var_996_cast_fp16)[name = tensor<string, []>("transpose_119")];
tensor<fp16, [1, 4, 128, 128]> x_139_cast_fp16 = matmul(transpose_x = x_139_transpose_x_0, transpose_y = x_139_transpose_y_0, x = input_273_cast_fp16, y = value_19_cast_fp16)[name = tensor<string, []>("x_139_cast_fp16")];
tensor<int32, [4]> var_1010_perm_0 = const()[name = tensor<string, []>("op_1010_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1012 = const()[name = tensor<string, []>("op_1012"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1010_cast_fp16 = transpose(perm = var_1010_perm_0, x = x_139_cast_fp16)[name = tensor<string, []>("transpose_116")];
tensor<fp16, [1, 128, 512]> input_275_cast_fp16 = reshape(shape = var_1012, x = var_1010_cast_fp16)[name = tensor<string, []>("input_275_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_9_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62739712)))];
tensor<fp16, [512]> d_decoders_9_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_9_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63264064)))];
tensor<fp16, [1, 128, 512]> linear_49_cast_fp16 = linear(bias = d_decoders_9_src_attn_linear_out_bias_to_fp16, weight = d_decoders_9_src_attn_linear_out_weight_to_fp16, x = input_275_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_279_cast_fp16 = add(x = input_271_cast_fp16, y = linear_49_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
tensor<int32, [1]> input_281_axes_0 = const()[name = tensor<string, []>("input_281_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_10_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63265152)))];
tensor<fp16, [512]> d_decoders_10_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63266240)))];
tensor<fp16, [1, 128, 512]> input_281_cast_fp16 = layer_norm(axes = input_281_axes_0, beta = d_decoders_10_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_norm1_weight_to_fp16, x = input_279_cast_fp16)[name = tensor<string, []>("input_281_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_10_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63267328)))];
tensor<fp16, [2048]> d_decoders_10_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65364544)))];
tensor<fp16, [1, 128, 2048]> linear_50_cast_fp16 = linear(bias = d_decoders_10_feed_forward_w_1_bias_to_fp16, weight = d_decoders_10_feed_forward_w_1_weight_to_fp16, x = input_281_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_285_cast_fp16 = relu(x = linear_50_cast_fp16)[name = tensor<string, []>("input_285_cast_fp16")];
tensor<int32, [1]> input_289_axes_0 = const()[name = tensor<string, []>("input_289_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_10_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65368704)))];
tensor<fp16, [2048]> d_decoders_10_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65372864)))];
tensor<fp16, [1, 128, 2048]> input_289_cast_fp16 = layer_norm(axes = input_289_axes_0, beta = d_decoders_10_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_feed_forward_norm_weight_to_fp16, x = input_285_cast_fp16)[name = tensor<string, []>("input_289_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_10_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65377024)))];
tensor<fp16, [1, 128, 512]> linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_10_feed_forward_w_2_weight_to_fp16, x = input_289_cast_fp16)[name = tensor<string, []>("linear_51_cast_fp16")];
tensor<int32, [1]> inputs_41_axes_0 = const()[name = tensor<string, []>("inputs_41_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_10_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67474240)))];
tensor<fp16, [512]> d_decoders_10_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67475328)))];
tensor<fp16, [1, 128, 512]> inputs_41_cast_fp16 = layer_norm(axes = inputs_41_axes_0, beta = d_decoders_10_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_norm2_weight_to_fp16, x = linear_51_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_43_cast_fp16 = mul(x = inputs_41_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
tensor<int32, [3]> x_141_perm_0 = const()[name = tensor<string, []>("x_141_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_293_pad_0 = const()[name = tensor<string, []>("input_293_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_293_mode_0 = const()[name = tensor<string, []>("input_293_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_65_to_fp16 = const()[name = tensor<string, []>("const_65_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_141_cast_fp16 = transpose(perm = x_141_perm_0, x = inputs_43_cast_fp16)[name = tensor<string, []>("transpose_115")];
tensor<fp16, [1, 512, 138]> input_293_cast_fp16 = pad(constant_val = const_65_to_fp16, mode = input_293_mode_0, pad = input_293_pad_0, x = x_141_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
tensor<string, []> x_143_pad_type_0 = const()[name = tensor<string, []>("x_143_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_143_groups_0 = const()[name = tensor<string, []>("x_143_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_143_strides_0 = const()[name = tensor<string, []>("x_143_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_143_pad_0 = const()[name = tensor<string, []>("x_143_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_143_dilations_0 = const()[name = tensor<string, []>("x_143_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_10_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67476416)))];
tensor<fp16, [1, 512, 128]> x_143_cast_fp16 = conv(dilations = x_143_dilations_0, groups = x_143_groups_0, pad = x_143_pad_0, pad_type = x_143_pad_type_0, strides = x_143_strides_0, weight = d_decoders_10_self_attn_fsmn_block_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("x_143_cast_fp16")];
tensor<int32, [3]> x_145_perm_0 = const()[name = tensor<string, []>("x_145_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_145_cast_fp16 = transpose(perm = x_145_perm_0, x = x_143_cast_fp16)[name = tensor<string, []>("transpose_114")];
tensor<fp16, [1, 128, 512]> input_295_cast_fp16 = add(x = x_145_cast_fp16, y = inputs_43_cast_fp16)[name = tensor<string, []>("input_295_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_297_cast_fp16 = mul(x = input_295_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_297_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_299_cast_fp16 = add(x = input_279_cast_fp16, y = input_297_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
tensor<int32, [1]> x_151_axes_0 = const()[name = tensor<string, []>("x_151_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_10_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67487744)))];
tensor<fp16, [512]> d_decoders_10_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67488832)))];
tensor<fp16, [1, 128, 512]> x_151_cast_fp16 = layer_norm(axes = x_151_axes_0, beta = d_decoders_10_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_10_norm3_weight_to_fp16, x = input_299_cast_fp16)[name = tensor<string, []>("x_151_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_10_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67489920)))];
tensor<fp16, [512]> d_decoders_10_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68014272)))];
tensor<fp16, [1, 128, 512]> linear_52_cast_fp16 = linear(bias = d_decoders_10_src_attn_linear_q_bias_to_fp16, weight = d_decoders_10_src_attn_linear_q_weight_to_fp16, x = x_151_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")];
tensor<int32, [4]> var_1078 = const()[name = tensor<string, []>("op_1078"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_1079_cast_fp16 = reshape(shape = var_1078, x = linear_52_cast_fp16)[name = tensor<string, []>("op_1079_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_10_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68015360)))];
tensor<fp16, [1024]> d_decoders_10_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69064000)))];
tensor<fp16, [1, 512, 1024]> linear_53_cast_fp16 = linear(bias = d_decoders_10_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_10_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_53_cast_fp16")];
tensor<int32, [2]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_1084_axis_0 = const()[name = tensor<string, []>("op_1084_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_1084_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1084_cast_fp16_1 = split(axis = var_1084_axis_0, split_sizes = tile_10, x = linear_53_cast_fp16)[name = tensor<string, []>("op_1084_cast_fp16")];
tensor<int32, [4]> var_1087 = const()[name = tensor<string, []>("op_1087"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1088_cast_fp16 = reshape(shape = var_1087, x = var_1084_cast_fp16_0)[name = tensor<string, []>("op_1088_cast_fp16")];
tensor<int32, [4]> var_1090 = const()[name = tensor<string, []>("op_1090"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1091_cast_fp16 = reshape(shape = var_1090, x = var_1084_cast_fp16_1)[name = tensor<string, []>("op_1091_cast_fp16")];
tensor<int32, [4]> value_21_perm_0 = const()[name = tensor<string, []>("value_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_1093_to_fp16 = const()[name = tensor<string, []>("op_1093_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_43_cast_fp16 = mul(x = var_1079_cast_fp16, y = var_1093_to_fp16)[name = tensor<string, []>("q_h_43_cast_fp16")];
tensor<bool, []> scores_41_transpose_x_0 = const()[name = tensor<string, []>("scores_41_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_41_transpose_y_0 = const()[name = tensor<string, []>("scores_41_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_68_perm_0 = const()[name = tensor<string, []>("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_69_perm_0 = const()[name = tensor<string, []>("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_69 = transpose(perm = transpose_69_perm_0, x = var_1088_cast_fp16)[name = tensor<string, []>("transpose_111")];
tensor<fp16, [1, 4, 128, 128]> transpose_68 = transpose(perm = transpose_68_perm_0, x = q_h_43_cast_fp16)[name = tensor<string, []>("transpose_112")];
tensor<fp16, [1, 4, 128, 512]> scores_41_cast_fp16 = matmul(transpose_x = scores_41_transpose_x_0, transpose_y = scores_41_transpose_y_0, x = transpose_68, y = transpose_69)[name = tensor<string, []>("scores_41_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_43_cast_fp16 = select(a = var_8_to_fp16, b = scores_41_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_43_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1101_cast_fp16 = softmax(axis = var_20, x = scores_43_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_301_cast_fp16 = select(a = var_9_to_fp16, b = var_1101_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_301_cast_fp16")];
tensor<bool, []> x_153_transpose_x_0 = const()[name = tensor<string, []>("x_153_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_153_transpose_y_0 = const()[name = tensor<string, []>("x_153_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_21_cast_fp16 = transpose(perm = value_21_perm_0, x = var_1091_cast_fp16)[name = tensor<string, []>("transpose_113")];
tensor<fp16, [1, 4, 128, 128]> x_153_cast_fp16 = matmul(transpose_x = x_153_transpose_x_0, transpose_y = x_153_transpose_y_0, x = input_301_cast_fp16, y = value_21_cast_fp16)[name = tensor<string, []>("x_153_cast_fp16")];
tensor<int32, [4]> var_1105_perm_0 = const()[name = tensor<string, []>("op_1105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1107 = const()[name = tensor<string, []>("op_1107"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1105_cast_fp16 = transpose(perm = var_1105_perm_0, x = x_153_cast_fp16)[name = tensor<string, []>("transpose_110")];
tensor<fp16, [1, 128, 512]> input_303_cast_fp16 = reshape(shape = var_1107, x = var_1105_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_10_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69066112)))];
tensor<fp16, [512]> d_decoders_10_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_10_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69590464)))];
tensor<fp16, [1, 128, 512]> linear_54_cast_fp16 = linear(bias = d_decoders_10_src_attn_linear_out_bias_to_fp16, weight = d_decoders_10_src_attn_linear_out_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_307_cast_fp16 = add(x = input_299_cast_fp16, y = linear_54_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
tensor<int32, [1]> input_309_axes_0 = const()[name = tensor<string, []>("input_309_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_11_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69591552)))];
tensor<fp16, [512]> d_decoders_11_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69592640)))];
tensor<fp16, [1, 128, 512]> input_309_cast_fp16 = layer_norm(axes = input_309_axes_0, beta = d_decoders_11_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_norm1_weight_to_fp16, x = input_307_cast_fp16)[name = tensor<string, []>("input_309_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_11_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69593728)))];
tensor<fp16, [2048]> d_decoders_11_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71690944)))];
tensor<fp16, [1, 128, 2048]> linear_55_cast_fp16 = linear(bias = d_decoders_11_feed_forward_w_1_bias_to_fp16, weight = d_decoders_11_feed_forward_w_1_weight_to_fp16, x = input_309_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_313_cast_fp16 = relu(x = linear_55_cast_fp16)[name = tensor<string, []>("input_313_cast_fp16")];
tensor<int32, [1]> input_317_axes_0 = const()[name = tensor<string, []>("input_317_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_11_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71695104)))];
tensor<fp16, [2048]> d_decoders_11_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71699264)))];
tensor<fp16, [1, 128, 2048]> input_317_cast_fp16 = layer_norm(axes = input_317_axes_0, beta = d_decoders_11_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_feed_forward_norm_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("input_317_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_11_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(71703424)))];
tensor<fp16, [1, 128, 512]> linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_11_feed_forward_w_2_weight_to_fp16, x = input_317_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")];
tensor<int32, [1]> inputs_45_axes_0 = const()[name = tensor<string, []>("inputs_45_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_11_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73800640)))];
tensor<fp16, [512]> d_decoders_11_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73801728)))];
tensor<fp16, [1, 128, 512]> inputs_45_cast_fp16 = layer_norm(axes = inputs_45_axes_0, beta = d_decoders_11_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_norm2_weight_to_fp16, x = linear_56_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_47_cast_fp16 = mul(x = inputs_45_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
tensor<int32, [3]> x_155_perm_0 = const()[name = tensor<string, []>("x_155_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_321_pad_0 = const()[name = tensor<string, []>("input_321_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_321_mode_0 = const()[name = tensor<string, []>("input_321_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_71_to_fp16 = const()[name = tensor<string, []>("const_71_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_155_cast_fp16 = transpose(perm = x_155_perm_0, x = inputs_47_cast_fp16)[name = tensor<string, []>("transpose_109")];
tensor<fp16, [1, 512, 138]> input_321_cast_fp16 = pad(constant_val = const_71_to_fp16, mode = input_321_mode_0, pad = input_321_pad_0, x = x_155_cast_fp16)[name = tensor<string, []>("input_321_cast_fp16")];
tensor<string, []> x_157_pad_type_0 = const()[name = tensor<string, []>("x_157_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_157_groups_0 = const()[name = tensor<string, []>("x_157_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_157_strides_0 = const()[name = tensor<string, []>("x_157_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_157_pad_0 = const()[name = tensor<string, []>("x_157_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_157_dilations_0 = const()[name = tensor<string, []>("x_157_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_11_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73802816)))];
tensor<fp16, [1, 512, 128]> x_157_cast_fp16 = conv(dilations = x_157_dilations_0, groups = x_157_groups_0, pad = x_157_pad_0, pad_type = x_157_pad_type_0, strides = x_157_strides_0, weight = d_decoders_11_self_attn_fsmn_block_weight_to_fp16, x = input_321_cast_fp16)[name = tensor<string, []>("x_157_cast_fp16")];
tensor<int32, [3]> x_159_perm_0 = const()[name = tensor<string, []>("x_159_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_159_cast_fp16 = transpose(perm = x_159_perm_0, x = x_157_cast_fp16)[name = tensor<string, []>("transpose_108")];
tensor<fp16, [1, 128, 512]> input_323_cast_fp16 = add(x = x_159_cast_fp16, y = inputs_47_cast_fp16)[name = tensor<string, []>("input_323_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_325_cast_fp16 = mul(x = input_323_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_325_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_327_cast_fp16 = add(x = input_307_cast_fp16, y = input_325_cast_fp16)[name = tensor<string, []>("input_327_cast_fp16")];
tensor<int32, [1]> x_165_axes_0 = const()[name = tensor<string, []>("x_165_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_11_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73814144)))];
tensor<fp16, [512]> d_decoders_11_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73815232)))];
tensor<fp16, [1, 128, 512]> x_165_cast_fp16 = layer_norm(axes = x_165_axes_0, beta = d_decoders_11_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_11_norm3_weight_to_fp16, x = input_327_cast_fp16)[name = tensor<string, []>("x_165_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_11_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73816320)))];
tensor<fp16, [512]> d_decoders_11_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74340672)))];
tensor<fp16, [1, 128, 512]> linear_57_cast_fp16 = linear(bias = d_decoders_11_src_attn_linear_q_bias_to_fp16, weight = d_decoders_11_src_attn_linear_q_weight_to_fp16, x = x_165_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")];
tensor<int32, [4]> var_1173 = const()[name = tensor<string, []>("op_1173"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_1174_cast_fp16 = reshape(shape = var_1173, x = linear_57_cast_fp16)[name = tensor<string, []>("op_1174_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_11_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74341760)))];
tensor<fp16, [1024]> d_decoders_11_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75390400)))];
tensor<fp16, [1, 512, 1024]> linear_58_cast_fp16 = linear(bias = d_decoders_11_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_11_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_58_cast_fp16")];
tensor<int32, [2]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_1179_axis_0 = const()[name = tensor<string, []>("op_1179_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_1179_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1179_cast_fp16_1 = split(axis = var_1179_axis_0, split_sizes = tile_11, x = linear_58_cast_fp16)[name = tensor<string, []>("op_1179_cast_fp16")];
tensor<int32, [4]> var_1182 = const()[name = tensor<string, []>("op_1182"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1183_cast_fp16 = reshape(shape = var_1182, x = var_1179_cast_fp16_0)[name = tensor<string, []>("op_1183_cast_fp16")];
tensor<int32, [4]> var_1185 = const()[name = tensor<string, []>("op_1185"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1186_cast_fp16 = reshape(shape = var_1185, x = var_1179_cast_fp16_1)[name = tensor<string, []>("op_1186_cast_fp16")];
tensor<int32, [4]> value_23_perm_0 = const()[name = tensor<string, []>("value_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_1188_to_fp16 = const()[name = tensor<string, []>("op_1188_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_47_cast_fp16 = mul(x = var_1174_cast_fp16, y = var_1188_to_fp16)[name = tensor<string, []>("q_h_47_cast_fp16")];
tensor<bool, []> scores_45_transpose_x_0 = const()[name = tensor<string, []>("scores_45_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_45_transpose_y_0 = const()[name = tensor<string, []>("scores_45_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_70_perm_0 = const()[name = tensor<string, []>("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_71_perm_0 = const()[name = tensor<string, []>("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_71 = transpose(perm = transpose_71_perm_0, x = var_1183_cast_fp16)[name = tensor<string, []>("transpose_105")];
tensor<fp16, [1, 4, 128, 128]> transpose_70 = transpose(perm = transpose_70_perm_0, x = q_h_47_cast_fp16)[name = tensor<string, []>("transpose_106")];
tensor<fp16, [1, 4, 128, 512]> scores_45_cast_fp16 = matmul(transpose_x = scores_45_transpose_x_0, transpose_y = scores_45_transpose_y_0, x = transpose_70, y = transpose_71)[name = tensor<string, []>("scores_45_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_47_cast_fp16 = select(a = var_8_to_fp16, b = scores_45_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_47_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1196_cast_fp16 = softmax(axis = var_20, x = scores_47_cast_fp16)[name = tensor<string, []>("op_1196_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_329_cast_fp16 = select(a = var_9_to_fp16, b = var_1196_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_329_cast_fp16")];
tensor<bool, []> x_167_transpose_x_0 = const()[name = tensor<string, []>("x_167_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_167_transpose_y_0 = const()[name = tensor<string, []>("x_167_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_23_cast_fp16 = transpose(perm = value_23_perm_0, x = var_1186_cast_fp16)[name = tensor<string, []>("transpose_107")];
tensor<fp16, [1, 4, 128, 128]> x_167_cast_fp16 = matmul(transpose_x = x_167_transpose_x_0, transpose_y = x_167_transpose_y_0, x = input_329_cast_fp16, y = value_23_cast_fp16)[name = tensor<string, []>("x_167_cast_fp16")];
tensor<int32, [4]> var_1200_perm_0 = const()[name = tensor<string, []>("op_1200_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1202 = const()[name = tensor<string, []>("op_1202"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1200_cast_fp16 = transpose(perm = var_1200_perm_0, x = x_167_cast_fp16)[name = tensor<string, []>("transpose_104")];
tensor<fp16, [1, 128, 512]> input_331_cast_fp16 = reshape(shape = var_1202, x = var_1200_cast_fp16)[name = tensor<string, []>("input_331_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_11_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75392512)))];
tensor<fp16, [512]> d_decoders_11_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_11_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75916864)))];
tensor<fp16, [1, 128, 512]> linear_59_cast_fp16 = linear(bias = d_decoders_11_src_attn_linear_out_bias_to_fp16, weight = d_decoders_11_src_attn_linear_out_weight_to_fp16, x = input_331_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_335_cast_fp16 = add(x = input_327_cast_fp16, y = linear_59_cast_fp16)[name = tensor<string, []>("input_335_cast_fp16")];
tensor<int32, [1]> input_337_axes_0 = const()[name = tensor<string, []>("input_337_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_12_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75917952)))];
tensor<fp16, [512]> d_decoders_12_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75919040)))];
tensor<fp16, [1, 128, 512]> input_337_cast_fp16 = layer_norm(axes = input_337_axes_0, beta = d_decoders_12_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_norm1_weight_to_fp16, x = input_335_cast_fp16)[name = tensor<string, []>("input_337_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_12_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75920128)))];
tensor<fp16, [2048]> d_decoders_12_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78017344)))];
tensor<fp16, [1, 128, 2048]> linear_60_cast_fp16 = linear(bias = d_decoders_12_feed_forward_w_1_bias_to_fp16, weight = d_decoders_12_feed_forward_w_1_weight_to_fp16, x = input_337_cast_fp16)[name = tensor<string, []>("linear_60_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_341_cast_fp16 = relu(x = linear_60_cast_fp16)[name = tensor<string, []>("input_341_cast_fp16")];
tensor<int32, [1]> input_345_axes_0 = const()[name = tensor<string, []>("input_345_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_12_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78021504)))];
tensor<fp16, [2048]> d_decoders_12_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78025664)))];
tensor<fp16, [1, 128, 2048]> input_345_cast_fp16 = layer_norm(axes = input_345_axes_0, beta = d_decoders_12_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_feed_forward_norm_weight_to_fp16, x = input_341_cast_fp16)[name = tensor<string, []>("input_345_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_12_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78029824)))];
tensor<fp16, [1, 128, 512]> linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_12_feed_forward_w_2_weight_to_fp16, x = input_345_cast_fp16)[name = tensor<string, []>("linear_61_cast_fp16")];
tensor<int32, [1]> inputs_49_axes_0 = const()[name = tensor<string, []>("inputs_49_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_12_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80127040)))];
tensor<fp16, [512]> d_decoders_12_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80128128)))];
tensor<fp16, [1, 128, 512]> inputs_49_cast_fp16 = layer_norm(axes = inputs_49_axes_0, beta = d_decoders_12_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_norm2_weight_to_fp16, x = linear_61_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_51_cast_fp16 = mul(x = inputs_49_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
tensor<int32, [3]> x_169_perm_0 = const()[name = tensor<string, []>("x_169_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_349_pad_0 = const()[name = tensor<string, []>("input_349_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_349_mode_0 = const()[name = tensor<string, []>("input_349_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_77_to_fp16 = const()[name = tensor<string, []>("const_77_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_169_cast_fp16 = transpose(perm = x_169_perm_0, x = inputs_51_cast_fp16)[name = tensor<string, []>("transpose_103")];
tensor<fp16, [1, 512, 138]> input_349_cast_fp16 = pad(constant_val = const_77_to_fp16, mode = input_349_mode_0, pad = input_349_pad_0, x = x_169_cast_fp16)[name = tensor<string, []>("input_349_cast_fp16")];
tensor<string, []> x_171_pad_type_0 = const()[name = tensor<string, []>("x_171_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_171_groups_0 = const()[name = tensor<string, []>("x_171_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_171_strides_0 = const()[name = tensor<string, []>("x_171_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_171_pad_0 = const()[name = tensor<string, []>("x_171_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_171_dilations_0 = const()[name = tensor<string, []>("x_171_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_12_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80129216)))];
tensor<fp16, [1, 512, 128]> x_171_cast_fp16 = conv(dilations = x_171_dilations_0, groups = x_171_groups_0, pad = x_171_pad_0, pad_type = x_171_pad_type_0, strides = x_171_strides_0, weight = d_decoders_12_self_attn_fsmn_block_weight_to_fp16, x = input_349_cast_fp16)[name = tensor<string, []>("x_171_cast_fp16")];
tensor<int32, [3]> x_173_perm_0 = const()[name = tensor<string, []>("x_173_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_173_cast_fp16 = transpose(perm = x_173_perm_0, x = x_171_cast_fp16)[name = tensor<string, []>("transpose_102")];
tensor<fp16, [1, 128, 512]> input_351_cast_fp16 = add(x = x_173_cast_fp16, y = inputs_51_cast_fp16)[name = tensor<string, []>("input_351_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_353_cast_fp16 = mul(x = input_351_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_353_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_355_cast_fp16 = add(x = input_335_cast_fp16, y = input_353_cast_fp16)[name = tensor<string, []>("input_355_cast_fp16")];
tensor<int32, [1]> x_179_axes_0 = const()[name = tensor<string, []>("x_179_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_12_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80140544)))];
tensor<fp16, [512]> d_decoders_12_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80141632)))];
tensor<fp16, [1, 128, 512]> x_179_cast_fp16 = layer_norm(axes = x_179_axes_0, beta = d_decoders_12_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_12_norm3_weight_to_fp16, x = input_355_cast_fp16)[name = tensor<string, []>("x_179_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_12_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80142720)))];
tensor<fp16, [512]> d_decoders_12_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80667072)))];
tensor<fp16, [1, 128, 512]> linear_62_cast_fp16 = linear(bias = d_decoders_12_src_attn_linear_q_bias_to_fp16, weight = d_decoders_12_src_attn_linear_q_weight_to_fp16, x = x_179_cast_fp16)[name = tensor<string, []>("linear_62_cast_fp16")];
tensor<int32, [4]> var_1268 = const()[name = tensor<string, []>("op_1268"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_1269_cast_fp16 = reshape(shape = var_1268, x = linear_62_cast_fp16)[name = tensor<string, []>("op_1269_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_12_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80668160)))];
tensor<fp16, [1024]> d_decoders_12_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81716800)))];
tensor<fp16, [1, 512, 1024]> linear_63_cast_fp16 = linear(bias = d_decoders_12_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_12_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_63_cast_fp16")];
tensor<int32, [2]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_1274_axis_0 = const()[name = tensor<string, []>("op_1274_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_1274_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1274_cast_fp16_1 = split(axis = var_1274_axis_0, split_sizes = tile_12, x = linear_63_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
tensor<int32, [4]> var_1277 = const()[name = tensor<string, []>("op_1277"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1278_cast_fp16 = reshape(shape = var_1277, x = var_1274_cast_fp16_0)[name = tensor<string, []>("op_1278_cast_fp16")];
tensor<int32, [4]> var_1280 = const()[name = tensor<string, []>("op_1280"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1281_cast_fp16 = reshape(shape = var_1280, x = var_1274_cast_fp16_1)[name = tensor<string, []>("op_1281_cast_fp16")];
tensor<int32, [4]> value_25_perm_0 = const()[name = tensor<string, []>("value_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_1283_to_fp16 = const()[name = tensor<string, []>("op_1283_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_51_cast_fp16 = mul(x = var_1269_cast_fp16, y = var_1283_to_fp16)[name = tensor<string, []>("q_h_51_cast_fp16")];
tensor<bool, []> scores_49_transpose_x_0 = const()[name = tensor<string, []>("scores_49_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_49_transpose_y_0 = const()[name = tensor<string, []>("scores_49_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_72_perm_0 = const()[name = tensor<string, []>("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_73_perm_0 = const()[name = tensor<string, []>("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_73 = transpose(perm = transpose_73_perm_0, x = var_1278_cast_fp16)[name = tensor<string, []>("transpose_99")];
tensor<fp16, [1, 4, 128, 128]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_h_51_cast_fp16)[name = tensor<string, []>("transpose_100")];
tensor<fp16, [1, 4, 128, 512]> scores_49_cast_fp16 = matmul(transpose_x = scores_49_transpose_x_0, transpose_y = scores_49_transpose_y_0, x = transpose_72, y = transpose_73)[name = tensor<string, []>("scores_49_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_51_cast_fp16 = select(a = var_8_to_fp16, b = scores_49_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_51_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1291_cast_fp16 = softmax(axis = var_20, x = scores_51_cast_fp16)[name = tensor<string, []>("op_1291_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_357_cast_fp16 = select(a = var_9_to_fp16, b = var_1291_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_357_cast_fp16")];
tensor<bool, []> x_181_transpose_x_0 = const()[name = tensor<string, []>("x_181_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_181_transpose_y_0 = const()[name = tensor<string, []>("x_181_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_25_cast_fp16 = transpose(perm = value_25_perm_0, x = var_1281_cast_fp16)[name = tensor<string, []>("transpose_101")];
tensor<fp16, [1, 4, 128, 128]> x_181_cast_fp16 = matmul(transpose_x = x_181_transpose_x_0, transpose_y = x_181_transpose_y_0, x = input_357_cast_fp16, y = value_25_cast_fp16)[name = tensor<string, []>("x_181_cast_fp16")];
tensor<int32, [4]> var_1295_perm_0 = const()[name = tensor<string, []>("op_1295_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1297 = const()[name = tensor<string, []>("op_1297"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1295_cast_fp16 = transpose(perm = var_1295_perm_0, x = x_181_cast_fp16)[name = tensor<string, []>("transpose_98")];
tensor<fp16, [1, 128, 512]> input_359_cast_fp16 = reshape(shape = var_1297, x = var_1295_cast_fp16)[name = tensor<string, []>("input_359_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_12_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81718912)))];
tensor<fp16, [512]> d_decoders_12_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_12_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82243264)))];
tensor<fp16, [1, 128, 512]> linear_64_cast_fp16 = linear(bias = d_decoders_12_src_attn_linear_out_bias_to_fp16, weight = d_decoders_12_src_attn_linear_out_weight_to_fp16, x = input_359_cast_fp16)[name = tensor<string, []>("linear_64_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_363_cast_fp16 = add(x = input_355_cast_fp16, y = linear_64_cast_fp16)[name = tensor<string, []>("input_363_cast_fp16")];
tensor<int32, [1]> input_365_axes_0 = const()[name = tensor<string, []>("input_365_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_13_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82244352)))];
tensor<fp16, [512]> d_decoders_13_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82245440)))];
tensor<fp16, [1, 128, 512]> input_365_cast_fp16 = layer_norm(axes = input_365_axes_0, beta = d_decoders_13_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_norm1_weight_to_fp16, x = input_363_cast_fp16)[name = tensor<string, []>("input_365_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_13_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82246528)))];
tensor<fp16, [2048]> d_decoders_13_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84343744)))];
tensor<fp16, [1, 128, 2048]> linear_65_cast_fp16 = linear(bias = d_decoders_13_feed_forward_w_1_bias_to_fp16, weight = d_decoders_13_feed_forward_w_1_weight_to_fp16, x = input_365_cast_fp16)[name = tensor<string, []>("linear_65_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_369_cast_fp16 = relu(x = linear_65_cast_fp16)[name = tensor<string, []>("input_369_cast_fp16")];
tensor<int32, [1]> input_373_axes_0 = const()[name = tensor<string, []>("input_373_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_13_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84347904)))];
tensor<fp16, [2048]> d_decoders_13_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84352064)))];
tensor<fp16, [1, 128, 2048]> input_373_cast_fp16 = layer_norm(axes = input_373_axes_0, beta = d_decoders_13_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_feed_forward_norm_weight_to_fp16, x = input_369_cast_fp16)[name = tensor<string, []>("input_373_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_13_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84356224)))];
tensor<fp16, [1, 128, 512]> linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_13_feed_forward_w_2_weight_to_fp16, x = input_373_cast_fp16)[name = tensor<string, []>("linear_66_cast_fp16")];
tensor<int32, [1]> inputs_53_axes_0 = const()[name = tensor<string, []>("inputs_53_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_13_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86453440)))];
tensor<fp16, [512]> d_decoders_13_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86454528)))];
tensor<fp16, [1, 128, 512]> inputs_53_cast_fp16 = layer_norm(axes = inputs_53_axes_0, beta = d_decoders_13_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_norm2_weight_to_fp16, x = linear_66_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_55_cast_fp16 = mul(x = inputs_53_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
tensor<int32, [3]> x_183_perm_0 = const()[name = tensor<string, []>("x_183_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_377_pad_0 = const()[name = tensor<string, []>("input_377_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_377_mode_0 = const()[name = tensor<string, []>("input_377_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_83_to_fp16 = const()[name = tensor<string, []>("const_83_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_183_cast_fp16 = transpose(perm = x_183_perm_0, x = inputs_55_cast_fp16)[name = tensor<string, []>("transpose_97")];
tensor<fp16, [1, 512, 138]> input_377_cast_fp16 = pad(constant_val = const_83_to_fp16, mode = input_377_mode_0, pad = input_377_pad_0, x = x_183_cast_fp16)[name = tensor<string, []>("input_377_cast_fp16")];
tensor<string, []> x_185_pad_type_0 = const()[name = tensor<string, []>("x_185_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_185_groups_0 = const()[name = tensor<string, []>("x_185_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_185_strides_0 = const()[name = tensor<string, []>("x_185_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_185_pad_0 = const()[name = tensor<string, []>("x_185_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_185_dilations_0 = const()[name = tensor<string, []>("x_185_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_13_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86455616)))];
tensor<fp16, [1, 512, 128]> x_185_cast_fp16 = conv(dilations = x_185_dilations_0, groups = x_185_groups_0, pad = x_185_pad_0, pad_type = x_185_pad_type_0, strides = x_185_strides_0, weight = d_decoders_13_self_attn_fsmn_block_weight_to_fp16, x = input_377_cast_fp16)[name = tensor<string, []>("x_185_cast_fp16")];
tensor<int32, [3]> x_187_perm_0 = const()[name = tensor<string, []>("x_187_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_187_cast_fp16 = transpose(perm = x_187_perm_0, x = x_185_cast_fp16)[name = tensor<string, []>("transpose_96")];
tensor<fp16, [1, 128, 512]> input_379_cast_fp16 = add(x = x_187_cast_fp16, y = inputs_55_cast_fp16)[name = tensor<string, []>("input_379_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_381_cast_fp16 = mul(x = input_379_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_381_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_383_cast_fp16 = add(x = input_363_cast_fp16, y = input_381_cast_fp16)[name = tensor<string, []>("input_383_cast_fp16")];
tensor<int32, [1]> x_193_axes_0 = const()[name = tensor<string, []>("x_193_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_13_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86466944)))];
tensor<fp16, [512]> d_decoders_13_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86468032)))];
tensor<fp16, [1, 128, 512]> x_193_cast_fp16 = layer_norm(axes = x_193_axes_0, beta = d_decoders_13_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_13_norm3_weight_to_fp16, x = input_383_cast_fp16)[name = tensor<string, []>("x_193_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_13_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86469120)))];
tensor<fp16, [512]> d_decoders_13_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86993472)))];
tensor<fp16, [1, 128, 512]> linear_67_cast_fp16 = linear(bias = d_decoders_13_src_attn_linear_q_bias_to_fp16, weight = d_decoders_13_src_attn_linear_q_weight_to_fp16, x = x_193_cast_fp16)[name = tensor<string, []>("linear_67_cast_fp16")];
tensor<int32, [4]> var_1363 = const()[name = tensor<string, []>("op_1363"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_1364_cast_fp16 = reshape(shape = var_1363, x = linear_67_cast_fp16)[name = tensor<string, []>("op_1364_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_13_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86994560)))];
tensor<fp16, [1024]> d_decoders_13_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88043200)))];
tensor<fp16, [1, 512, 1024]> linear_68_cast_fp16 = linear(bias = d_decoders_13_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_13_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_68_cast_fp16")];
tensor<int32, [2]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_1369_axis_0 = const()[name = tensor<string, []>("op_1369_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_1369_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1369_cast_fp16_1 = split(axis = var_1369_axis_0, split_sizes = tile_13, x = linear_68_cast_fp16)[name = tensor<string, []>("op_1369_cast_fp16")];
tensor<int32, [4]> var_1372 = const()[name = tensor<string, []>("op_1372"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1373_cast_fp16 = reshape(shape = var_1372, x = var_1369_cast_fp16_0)[name = tensor<string, []>("op_1373_cast_fp16")];
tensor<int32, [4]> var_1375 = const()[name = tensor<string, []>("op_1375"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1376_cast_fp16 = reshape(shape = var_1375, x = var_1369_cast_fp16_1)[name = tensor<string, []>("op_1376_cast_fp16")];
tensor<int32, [4]> value_27_perm_0 = const()[name = tensor<string, []>("value_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_1378_to_fp16 = const()[name = tensor<string, []>("op_1378_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_55_cast_fp16 = mul(x = var_1364_cast_fp16, y = var_1378_to_fp16)[name = tensor<string, []>("q_h_55_cast_fp16")];
tensor<bool, []> scores_53_transpose_x_0 = const()[name = tensor<string, []>("scores_53_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_53_transpose_y_0 = const()[name = tensor<string, []>("scores_53_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_74_perm_0 = const()[name = tensor<string, []>("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_75_perm_0 = const()[name = tensor<string, []>("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_75 = transpose(perm = transpose_75_perm_0, x = var_1373_cast_fp16)[name = tensor<string, []>("transpose_93")];
tensor<fp16, [1, 4, 128, 128]> transpose_74 = transpose(perm = transpose_74_perm_0, x = q_h_55_cast_fp16)[name = tensor<string, []>("transpose_94")];
tensor<fp16, [1, 4, 128, 512]> scores_53_cast_fp16 = matmul(transpose_x = scores_53_transpose_x_0, transpose_y = scores_53_transpose_y_0, x = transpose_74, y = transpose_75)[name = tensor<string, []>("scores_53_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_55_cast_fp16 = select(a = var_8_to_fp16, b = scores_53_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_55_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1386_cast_fp16 = softmax(axis = var_20, x = scores_55_cast_fp16)[name = tensor<string, []>("op_1386_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_385_cast_fp16 = select(a = var_9_to_fp16, b = var_1386_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_385_cast_fp16")];
tensor<bool, []> x_195_transpose_x_0 = const()[name = tensor<string, []>("x_195_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_195_transpose_y_0 = const()[name = tensor<string, []>("x_195_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_27_cast_fp16 = transpose(perm = value_27_perm_0, x = var_1376_cast_fp16)[name = tensor<string, []>("transpose_95")];
tensor<fp16, [1, 4, 128, 128]> x_195_cast_fp16 = matmul(transpose_x = x_195_transpose_x_0, transpose_y = x_195_transpose_y_0, x = input_385_cast_fp16, y = value_27_cast_fp16)[name = tensor<string, []>("x_195_cast_fp16")];
tensor<int32, [4]> var_1390_perm_0 = const()[name = tensor<string, []>("op_1390_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1392 = const()[name = tensor<string, []>("op_1392"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1390_cast_fp16 = transpose(perm = var_1390_perm_0, x = x_195_cast_fp16)[name = tensor<string, []>("transpose_92")];
tensor<fp16, [1, 128, 512]> input_387_cast_fp16 = reshape(shape = var_1392, x = var_1390_cast_fp16)[name = tensor<string, []>("input_387_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_13_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88045312)))];
tensor<fp16, [512]> d_decoders_13_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_13_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88569664)))];
tensor<fp16, [1, 128, 512]> linear_69_cast_fp16 = linear(bias = d_decoders_13_src_attn_linear_out_bias_to_fp16, weight = d_decoders_13_src_attn_linear_out_weight_to_fp16, x = input_387_cast_fp16)[name = tensor<string, []>("linear_69_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_391_cast_fp16 = add(x = input_383_cast_fp16, y = linear_69_cast_fp16)[name = tensor<string, []>("input_391_cast_fp16")];
tensor<int32, [1]> input_393_axes_0 = const()[name = tensor<string, []>("input_393_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_14_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88570752)))];
tensor<fp16, [512]> d_decoders_14_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88571840)))];
tensor<fp16, [1, 128, 512]> input_393_cast_fp16 = layer_norm(axes = input_393_axes_0, beta = d_decoders_14_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_norm1_weight_to_fp16, x = input_391_cast_fp16)[name = tensor<string, []>("input_393_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_14_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88572928)))];
tensor<fp16, [2048]> d_decoders_14_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90670144)))];
tensor<fp16, [1, 128, 2048]> linear_70_cast_fp16 = linear(bias = d_decoders_14_feed_forward_w_1_bias_to_fp16, weight = d_decoders_14_feed_forward_w_1_weight_to_fp16, x = input_393_cast_fp16)[name = tensor<string, []>("linear_70_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_397_cast_fp16 = relu(x = linear_70_cast_fp16)[name = tensor<string, []>("input_397_cast_fp16")];
tensor<int32, [1]> input_401_axes_0 = const()[name = tensor<string, []>("input_401_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_14_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90674304)))];
tensor<fp16, [2048]> d_decoders_14_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90678464)))];
tensor<fp16, [1, 128, 2048]> input_401_cast_fp16 = layer_norm(axes = input_401_axes_0, beta = d_decoders_14_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_feed_forward_norm_weight_to_fp16, x = input_397_cast_fp16)[name = tensor<string, []>("input_401_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_14_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90682624)))];
tensor<fp16, [1, 128, 512]> linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_14_feed_forward_w_2_weight_to_fp16, x = input_401_cast_fp16)[name = tensor<string, []>("linear_71_cast_fp16")];
tensor<int32, [1]> inputs_57_axes_0 = const()[name = tensor<string, []>("inputs_57_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_14_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92779840)))];
tensor<fp16, [512]> d_decoders_14_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92780928)))];
tensor<fp16, [1, 128, 512]> inputs_57_cast_fp16 = layer_norm(axes = inputs_57_axes_0, beta = d_decoders_14_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_norm2_weight_to_fp16, x = linear_71_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_59_cast_fp16 = mul(x = inputs_57_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
tensor<int32, [3]> x_197_perm_0 = const()[name = tensor<string, []>("x_197_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_405_pad_0 = const()[name = tensor<string, []>("input_405_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_405_mode_0 = const()[name = tensor<string, []>("input_405_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_89_to_fp16 = const()[name = tensor<string, []>("const_89_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_197_cast_fp16 = transpose(perm = x_197_perm_0, x = inputs_59_cast_fp16)[name = tensor<string, []>("transpose_91")];
tensor<fp16, [1, 512, 138]> input_405_cast_fp16 = pad(constant_val = const_89_to_fp16, mode = input_405_mode_0, pad = input_405_pad_0, x = x_197_cast_fp16)[name = tensor<string, []>("input_405_cast_fp16")];
tensor<string, []> x_199_pad_type_0 = const()[name = tensor<string, []>("x_199_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_199_groups_0 = const()[name = tensor<string, []>("x_199_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_199_strides_0 = const()[name = tensor<string, []>("x_199_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_199_pad_0 = const()[name = tensor<string, []>("x_199_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_199_dilations_0 = const()[name = tensor<string, []>("x_199_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_14_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92782016)))];
tensor<fp16, [1, 512, 128]> x_199_cast_fp16 = conv(dilations = x_199_dilations_0, groups = x_199_groups_0, pad = x_199_pad_0, pad_type = x_199_pad_type_0, strides = x_199_strides_0, weight = d_decoders_14_self_attn_fsmn_block_weight_to_fp16, x = input_405_cast_fp16)[name = tensor<string, []>("x_199_cast_fp16")];
tensor<int32, [3]> x_201_perm_0 = const()[name = tensor<string, []>("x_201_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_201_cast_fp16 = transpose(perm = x_201_perm_0, x = x_199_cast_fp16)[name = tensor<string, []>("transpose_90")];
tensor<fp16, [1, 128, 512]> input_407_cast_fp16 = add(x = x_201_cast_fp16, y = inputs_59_cast_fp16)[name = tensor<string, []>("input_407_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_409_cast_fp16 = mul(x = input_407_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_409_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_411_cast_fp16 = add(x = input_391_cast_fp16, y = input_409_cast_fp16)[name = tensor<string, []>("input_411_cast_fp16")];
tensor<int32, [1]> x_207_axes_0 = const()[name = tensor<string, []>("x_207_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_14_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92793344)))];
tensor<fp16, [512]> d_decoders_14_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92794432)))];
tensor<fp16, [1, 128, 512]> x_207_cast_fp16 = layer_norm(axes = x_207_axes_0, beta = d_decoders_14_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_14_norm3_weight_to_fp16, x = input_411_cast_fp16)[name = tensor<string, []>("x_207_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_14_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92795520)))];
tensor<fp16, [512]> d_decoders_14_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93319872)))];
tensor<fp16, [1, 128, 512]> linear_72_cast_fp16 = linear(bias = d_decoders_14_src_attn_linear_q_bias_to_fp16, weight = d_decoders_14_src_attn_linear_q_weight_to_fp16, x = x_207_cast_fp16)[name = tensor<string, []>("linear_72_cast_fp16")];
tensor<int32, [4]> var_1458 = const()[name = tensor<string, []>("op_1458"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_1459_cast_fp16 = reshape(shape = var_1458, x = linear_72_cast_fp16)[name = tensor<string, []>("op_1459_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_14_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93320960)))];
tensor<fp16, [1024]> d_decoders_14_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94369600)))];
tensor<fp16, [1, 512, 1024]> linear_73_cast_fp16 = linear(bias = d_decoders_14_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_14_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_73_cast_fp16")];
tensor<int32, [2]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_1464_axis_0 = const()[name = tensor<string, []>("op_1464_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_1464_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1464_cast_fp16_1 = split(axis = var_1464_axis_0, split_sizes = tile_14, x = linear_73_cast_fp16)[name = tensor<string, []>("op_1464_cast_fp16")];
tensor<int32, [4]> var_1467 = const()[name = tensor<string, []>("op_1467"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1468_cast_fp16 = reshape(shape = var_1467, x = var_1464_cast_fp16_0)[name = tensor<string, []>("op_1468_cast_fp16")];
tensor<int32, [4]> var_1470 = const()[name = tensor<string, []>("op_1470"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1471_cast_fp16 = reshape(shape = var_1470, x = var_1464_cast_fp16_1)[name = tensor<string, []>("op_1471_cast_fp16")];
tensor<int32, [4]> value_29_perm_0 = const()[name = tensor<string, []>("value_29_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_1473_to_fp16 = const()[name = tensor<string, []>("op_1473_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_59_cast_fp16 = mul(x = var_1459_cast_fp16, y = var_1473_to_fp16)[name = tensor<string, []>("q_h_59_cast_fp16")];
tensor<bool, []> scores_57_transpose_x_0 = const()[name = tensor<string, []>("scores_57_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_57_transpose_y_0 = const()[name = tensor<string, []>("scores_57_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_76_perm_0 = const()[name = tensor<string, []>("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_77_perm_0 = const()[name = tensor<string, []>("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_77 = transpose(perm = transpose_77_perm_0, x = var_1468_cast_fp16)[name = tensor<string, []>("transpose_87")];
tensor<fp16, [1, 4, 128, 128]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_h_59_cast_fp16)[name = tensor<string, []>("transpose_88")];
tensor<fp16, [1, 4, 128, 512]> scores_57_cast_fp16 = matmul(transpose_x = scores_57_transpose_x_0, transpose_y = scores_57_transpose_y_0, x = transpose_76, y = transpose_77)[name = tensor<string, []>("scores_57_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_59_cast_fp16 = select(a = var_8_to_fp16, b = scores_57_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_59_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1481_cast_fp16 = softmax(axis = var_20, x = scores_59_cast_fp16)[name = tensor<string, []>("op_1481_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_413_cast_fp16 = select(a = var_9_to_fp16, b = var_1481_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_413_cast_fp16")];
tensor<bool, []> x_209_transpose_x_0 = const()[name = tensor<string, []>("x_209_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_209_transpose_y_0 = const()[name = tensor<string, []>("x_209_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_29_cast_fp16 = transpose(perm = value_29_perm_0, x = var_1471_cast_fp16)[name = tensor<string, []>("transpose_89")];
tensor<fp16, [1, 4, 128, 128]> x_209_cast_fp16 = matmul(transpose_x = x_209_transpose_x_0, transpose_y = x_209_transpose_y_0, x = input_413_cast_fp16, y = value_29_cast_fp16)[name = tensor<string, []>("x_209_cast_fp16")];
tensor<int32, [4]> var_1485_perm_0 = const()[name = tensor<string, []>("op_1485_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1487 = const()[name = tensor<string, []>("op_1487"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1485_cast_fp16 = transpose(perm = var_1485_perm_0, x = x_209_cast_fp16)[name = tensor<string, []>("transpose_86")];
tensor<fp16, [1, 128, 512]> input_415_cast_fp16 = reshape(shape = var_1487, x = var_1485_cast_fp16)[name = tensor<string, []>("input_415_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_14_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94371712)))];
tensor<fp16, [512]> d_decoders_14_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_14_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94896064)))];
tensor<fp16, [1, 128, 512]> linear_74_cast_fp16 = linear(bias = d_decoders_14_src_attn_linear_out_bias_to_fp16, weight = d_decoders_14_src_attn_linear_out_weight_to_fp16, x = input_415_cast_fp16)[name = tensor<string, []>("linear_74_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_419_cast_fp16 = add(x = input_411_cast_fp16, y = linear_74_cast_fp16)[name = tensor<string, []>("input_419_cast_fp16")];
tensor<int32, [1]> input_421_axes_0 = const()[name = tensor<string, []>("input_421_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_15_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94897152)))];
tensor<fp16, [512]> d_decoders_15_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94898240)))];
tensor<fp16, [1, 128, 512]> input_421_cast_fp16 = layer_norm(axes = input_421_axes_0, beta = d_decoders_15_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_norm1_weight_to_fp16, x = input_419_cast_fp16)[name = tensor<string, []>("input_421_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders_15_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94899328)))];
tensor<fp16, [2048]> d_decoders_15_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96996544)))];
tensor<fp16, [1, 128, 2048]> linear_75_cast_fp16 = linear(bias = d_decoders_15_feed_forward_w_1_bias_to_fp16, weight = d_decoders_15_feed_forward_w_1_weight_to_fp16, x = input_421_cast_fp16)[name = tensor<string, []>("linear_75_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_425_cast_fp16 = relu(x = linear_75_cast_fp16)[name = tensor<string, []>("input_425_cast_fp16")];
tensor<int32, [1]> input_429_axes_0 = const()[name = tensor<string, []>("input_429_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders_15_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97000704)))];
tensor<fp16, [2048]> d_decoders_15_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97004864)))];
tensor<fp16, [1, 128, 2048]> input_429_cast_fp16 = layer_norm(axes = input_429_axes_0, beta = d_decoders_15_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_feed_forward_norm_weight_to_fp16, x = input_425_cast_fp16)[name = tensor<string, []>("input_429_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders_15_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97009024)))];
tensor<fp16, [1, 128, 512]> linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders_15_feed_forward_w_2_weight_to_fp16, x = input_429_cast_fp16)[name = tensor<string, []>("linear_76_cast_fp16")];
tensor<int32, [1]> inputs_61_axes_0 = const()[name = tensor<string, []>("inputs_61_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_15_norm2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm2_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99106240)))];
tensor<fp16, [512]> d_decoders_15_norm2_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99107328)))];
tensor<fp16, [1, 128, 512]> inputs_61_cast_fp16 = layer_norm(axes = inputs_61_axes_0, beta = d_decoders_15_norm2_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_norm2_weight_to_fp16, x = linear_76_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
tensor<fp16, [1, 128, 512]> inputs_cast_fp16 = mul(x = inputs_61_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
tensor<int32, [3]> x_211_perm_0 = const()[name = tensor<string, []>("x_211_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [6]> input_433_pad_0 = const()[name = tensor<string, []>("input_433_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 5, 5])];
tensor<string, []> input_433_mode_0 = const()[name = tensor<string, []>("input_433_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_95_to_fp16 = const()[name = tensor<string, []>("const_95_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 512, 128]> x_211_cast_fp16 = transpose(perm = x_211_perm_0, x = inputs_cast_fp16)[name = tensor<string, []>("transpose_85")];
tensor<fp16, [1, 512, 138]> input_433_cast_fp16 = pad(constant_val = const_95_to_fp16, mode = input_433_mode_0, pad = input_433_pad_0, x = x_211_cast_fp16)[name = tensor<string, []>("input_433_cast_fp16")];
tensor<string, []> x_213_pad_type_0 = const()[name = tensor<string, []>("x_213_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, []> x_213_groups_0 = const()[name = tensor<string, []>("x_213_groups_0"), val = tensor<int32, []>(512)];
tensor<int32, [1]> x_213_strides_0 = const()[name = tensor<string, []>("x_213_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_213_pad_0 = const()[name = tensor<string, []>("x_213_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_213_dilations_0 = const()[name = tensor<string, []>("x_213_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [512, 1, 11]> d_decoders_15_self_attn_fsmn_block_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_self_attn_fsmn_block_weight_to_fp16"), val = tensor<fp16, [512, 1, 11]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99108416)))];
tensor<fp16, [1, 512, 128]> x_213_cast_fp16 = conv(dilations = x_213_dilations_0, groups = x_213_groups_0, pad = x_213_pad_0, pad_type = x_213_pad_type_0, strides = x_213_strides_0, weight = d_decoders_15_self_attn_fsmn_block_weight_to_fp16, x = input_433_cast_fp16)[name = tensor<string, []>("x_213_cast_fp16")];
tensor<int32, [3]> x_215_perm_0 = const()[name = tensor<string, []>("x_215_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 128, 512]> x_215_cast_fp16 = transpose(perm = x_215_perm_0, x = x_213_cast_fp16)[name = tensor<string, []>("transpose_84")];
tensor<fp16, [1, 128, 512]> input_435_cast_fp16 = add(x = x_215_cast_fp16, y = inputs_cast_fp16)[name = tensor<string, []>("input_435_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_437_cast_fp16 = mul(x = input_435_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("input_437_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_439_cast_fp16 = add(x = input_419_cast_fp16, y = input_437_cast_fp16)[name = tensor<string, []>("input_439_cast_fp16")];
tensor<int32, [1]> x_221_axes_0 = const()[name = tensor<string, []>("x_221_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders_15_norm3_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm3_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99119744)))];
tensor<fp16, [512]> d_decoders_15_norm3_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_norm3_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99120832)))];
tensor<fp16, [1, 128, 512]> x_221_cast_fp16 = layer_norm(axes = x_221_axes_0, beta = d_decoders_15_norm3_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders_15_norm3_weight_to_fp16, x = input_439_cast_fp16)[name = tensor<string, []>("x_221_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_15_src_attn_linear_q_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_q_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99121920)))];
tensor<fp16, [512]> d_decoders_15_src_attn_linear_q_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_q_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99646272)))];
tensor<fp16, [1, 128, 512]> linear_77_cast_fp16 = linear(bias = d_decoders_15_src_attn_linear_q_bias_to_fp16, weight = d_decoders_15_src_attn_linear_q_weight_to_fp16, x = x_221_cast_fp16)[name = tensor<string, []>("linear_77_cast_fp16")];
tensor<int32, [4]> var_1553 = const()[name = tensor<string, []>("op_1553"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 128, 4, 128]> var_1554_cast_fp16 = reshape(shape = var_1553, x = linear_77_cast_fp16)[name = tensor<string, []>("op_1554_cast_fp16")];
tensor<fp16, [1024, 512]> d_decoders_15_src_attn_linear_k_v_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_k_v_weight_to_fp16"), val = tensor<fp16, [1024, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99647360)))];
tensor<fp16, [1024]> d_decoders_15_src_attn_linear_k_v_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_k_v_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100696000)))];
tensor<fp16, [1, 512, 1024]> linear_78_cast_fp16 = linear(bias = d_decoders_15_src_attn_linear_k_v_bias_to_fp16, weight = d_decoders_15_src_attn_linear_k_v_weight_to_fp16, x = enc_to_fp16)[name = tensor<string, []>("linear_78_cast_fp16")];
tensor<int32, [2]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [2]>([512, 512])];
tensor<int32, []> var_1559_axis_0 = const()[name = tensor<string, []>("op_1559_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 512, 512]> var_1559_cast_fp16_0, tensor<fp16, [1, 512, 512]> var_1559_cast_fp16_1 = split(axis = var_1559_axis_0, split_sizes = tile_15, x = linear_78_cast_fp16)[name = tensor<string, []>("op_1559_cast_fp16")];
tensor<int32, [4]> var_1562 = const()[name = tensor<string, []>("op_1562"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1563_cast_fp16 = reshape(shape = var_1562, x = var_1559_cast_fp16_0)[name = tensor<string, []>("op_1563_cast_fp16")];
tensor<int32, [4]> var_1565 = const()[name = tensor<string, []>("op_1565"), val = tensor<int32, [4]>([1, -1, 4, 128])];
tensor<fp16, [1, 512, 4, 128]> var_1566_cast_fp16 = reshape(shape = var_1565, x = var_1559_cast_fp16_1)[name = tensor<string, []>("op_1566_cast_fp16")];
tensor<int32, [4]> value_perm_0 = const()[name = tensor<string, []>("value_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> var_1568_to_fp16 = const()[name = tensor<string, []>("op_1568_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 128, 4, 128]> q_h_cast_fp16 = mul(x = var_1554_cast_fp16, y = var_1568_to_fp16)[name = tensor<string, []>("q_h_cast_fp16")];
tensor<bool, []> scores_61_transpose_x_0 = const()[name = tensor<string, []>("scores_61_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> scores_61_transpose_y_0 = const()[name = tensor<string, []>("scores_61_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_78_perm_0 = const()[name = tensor<string, []>("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_79_perm_0 = const()[name = tensor<string, []>("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 4, 128, 512]> transpose_79 = transpose(perm = transpose_79_perm_0, x = var_1563_cast_fp16)[name = tensor<string, []>("transpose_81")];
tensor<fp16, [1, 4, 128, 128]> transpose_78 = transpose(perm = transpose_78_perm_0, x = q_h_cast_fp16)[name = tensor<string, []>("transpose_82")];
tensor<fp16, [1, 4, 128, 512]> scores_61_cast_fp16 = matmul(transpose_x = scores_61_transpose_x_0, transpose_y = scores_61_transpose_y_0, x = transpose_78, y = transpose_79)[name = tensor<string, []>("scores_61_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> scores_cast_fp16 = select(a = var_8_to_fp16, b = scores_61_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("scores_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> var_1576_cast_fp16 = softmax(axis = var_20, x = scores_cast_fp16)[name = tensor<string, []>("op_1576_cast_fp16")];
tensor<fp16, [1, 4, 128, 512]> input_441_cast_fp16 = select(a = var_9_to_fp16, b = var_1576_cast_fp16, cond = mask_15_cast_fp16)[name = tensor<string, []>("input_441_cast_fp16")];
tensor<bool, []> x_transpose_x_0 = const()[name = tensor<string, []>("x_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> x_transpose_y_0 = const()[name = tensor<string, []>("x_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 4, 512, 128]> value_cast_fp16 = transpose(perm = value_perm_0, x = var_1566_cast_fp16)[name = tensor<string, []>("transpose_83")];
tensor<fp16, [1, 4, 128, 128]> x_cast_fp16 = matmul(transpose_x = x_transpose_x_0, transpose_y = x_transpose_y_0, x = input_441_cast_fp16, y = value_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
tensor<int32, [4]> var_1580_perm_0 = const()[name = tensor<string, []>("op_1580_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1582 = const()[name = tensor<string, []>("op_1582"), val = tensor<int32, [3]>([1, -1, 512])];
tensor<fp16, [1, 128, 4, 128]> var_1580_cast_fp16 = transpose(perm = var_1580_perm_0, x = x_cast_fp16)[name = tensor<string, []>("transpose_80")];
tensor<fp16, [1, 128, 512]> input_443_cast_fp16 = reshape(shape = var_1582, x = var_1580_cast_fp16)[name = tensor<string, []>("input_443_cast_fp16")];
tensor<fp16, [512, 512]> d_decoders_15_src_attn_linear_out_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_out_weight_to_fp16"), val = tensor<fp16, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100698112)))];
tensor<fp16, [512]> d_decoders_15_src_attn_linear_out_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders_15_src_attn_linear_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101222464)))];
tensor<fp16, [1, 128, 512]> linear_79_cast_fp16 = linear(bias = d_decoders_15_src_attn_linear_out_bias_to_fp16, weight = d_decoders_15_src_attn_linear_out_weight_to_fp16, x = input_443_cast_fp16)[name = tensor<string, []>("linear_79_cast_fp16")];
tensor<fp16, [1, 128, 512]> input_447_cast_fp16 = add(x = input_439_cast_fp16, y = linear_79_cast_fp16)[name = tensor<string, []>("input_447_cast_fp16")];
tensor<int32, [1]> input_449_axes_0 = const()[name = tensor<string, []>("input_449_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_decoders3_0_norm1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_norm1_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101223552)))];
tensor<fp16, [512]> d_decoders3_0_norm1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_norm1_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101224640)))];
tensor<fp16, [1, 128, 512]> input_449_cast_fp16 = layer_norm(axes = input_449_axes_0, beta = d_decoders3_0_norm1_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders3_0_norm1_weight_to_fp16, x = input_447_cast_fp16)[name = tensor<string, []>("input_449_cast_fp16")];
tensor<fp16, [2048, 512]> d_decoders3_0_feed_forward_w_1_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_w_1_weight_to_fp16"), val = tensor<fp16, [2048, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101225728)))];
tensor<fp16, [2048]> d_decoders3_0_feed_forward_w_1_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_w_1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103322944)))];
tensor<fp16, [1, 128, 2048]> linear_80_cast_fp16 = linear(bias = d_decoders3_0_feed_forward_w_1_bias_to_fp16, weight = d_decoders3_0_feed_forward_w_1_weight_to_fp16, x = input_449_cast_fp16)[name = tensor<string, []>("linear_80_cast_fp16")];
tensor<fp16, [1, 128, 2048]> input_453_cast_fp16 = relu(x = linear_80_cast_fp16)[name = tensor<string, []>("input_453_cast_fp16")];
tensor<int32, [1]> input_457_axes_0 = const()[name = tensor<string, []>("input_457_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [2048]> d_decoders3_0_feed_forward_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_norm_weight_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103327104)))];
tensor<fp16, [2048]> d_decoders3_0_feed_forward_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_norm_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103331264)))];
tensor<fp16, [1, 128, 2048]> input_457_cast_fp16 = layer_norm(axes = input_457_axes_0, beta = d_decoders3_0_feed_forward_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_decoders3_0_feed_forward_norm_weight_to_fp16, x = input_453_cast_fp16)[name = tensor<string, []>("input_457_cast_fp16")];
tensor<fp16, [512, 2048]> d_decoders3_0_feed_forward_w_2_weight_to_fp16 = const()[name = tensor<string, []>("d_decoders3_0_feed_forward_w_2_weight_to_fp16"), val = tensor<fp16, [512, 2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103335424)))];
tensor<fp16, [1, 128, 512]> linear_81_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = d_decoders3_0_feed_forward_w_2_weight_to_fp16, x = input_457_cast_fp16)[name = tensor<string, []>("linear_81_cast_fp16")];
tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [512]> d_after_norm_weight_to_fp16 = const()[name = tensor<string, []>("d_after_norm_weight_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105432640)))];
tensor<fp16, [512]> d_after_norm_bias_to_fp16 = const()[name = tensor<string, []>("d_after_norm_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105433728)))];
tensor<fp16, [1, 128, 512]> input_cast_fp16 = layer_norm(axes = input_axes_0, beta = d_after_norm_bias_to_fp16, epsilon = var_15_to_fp16, gamma = d_after_norm_weight_to_fp16, x = linear_81_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
tensor<fp16, [8404, 512]> d_output_layer_weight_to_fp16 = const()[name = tensor<string, []>("d_output_layer_weight_to_fp16"), val = tensor<fp16, [8404, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105434816)))];
tensor<fp16, [8404]> d_output_layer_bias_to_fp16 = const()[name = tensor<string, []>("d_output_layer_bias_to_fp16"), val = tensor<fp16, [8404]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114040576)))];
tensor<fp16, [1, 128, 8404]> logits = linear(bias = d_output_layer_bias_to_fp16, weight = d_output_layer_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("linear_82_cast_fp16")];
} -> (logits);
}