kamilobad's picture
Duplicate from aufklarer/Magpie-TTS-Multilingual-357M-CoreML-8bit
ca600aa
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})]
{
func main<ios17>(tensor<fp32, [1, 256]> mask, tensor<int32, [1, 256]> tokens) {
tensor<int32, []> x_1_batch_dims_0 = const()[name = tensor<string, []>("x_1_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<bool, []> x_1_validate_indices_0 = const()[name = tensor<string, []>("x_1_validate_indices_0"), val = tensor<bool, []>(false)];
tensor<fp16, [2362, 768]> text_embedding_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("text_embedding_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2362, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), scale = tensor<fp16, [2362]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1816576))), zero_point = tensor<int8, [2362]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1814144)))];
tensor<string, []> tokens_to_int16_dtype_0 = const()[name = tensor<string, []>("tokens_to_int16_dtype_0"), val = tensor<string, []>("int16")];
tensor<string, []> cast_14_dtype_0 = const()[name = tensor<string, []>("cast_14_dtype_0"), val = tensor<string, []>("int32")];
tensor<int32, []> greater_equal_0_y_0 = const()[name = tensor<string, []>("greater_equal_0_y_0"), val = tensor<int32, []>(0)];
tensor<int16, [1, 256]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = tensor<string, []>("cast_6")];
tensor<int32, [1, 256]> cast_14 = cast(dtype = cast_14_dtype_0, x = tokens_to_int16)[name = tensor<string, []>("cast_5")];
tensor<bool, [1, 256]> greater_equal_0 = greater_equal(x = cast_14, y = greater_equal_0_y_0)[name = tensor<string, []>("greater_equal_0")];
tensor<int32, []> slice_by_index_0 = const()[name = tensor<string, []>("slice_by_index_0"), val = tensor<int32, []>(2362)];
tensor<int32, [1, 256]> add_0 = add(x = cast_14, y = slice_by_index_0)[name = tensor<string, []>("add_0")];
tensor<int32, [1, 256]> select_0 = select(a = cast_14, b = add_0, cond = greater_equal_0)[name = tensor<string, []>("select_0")];
tensor<string, []> select_0_to_int16_dtype_0 = const()[name = tensor<string, []>("select_0_to_int16_dtype_0"), val = tensor<string, []>("int16")];
tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("int32")];
tensor<int32, []> greater_equal_0_y_0_1 = const()[name = tensor<string, []>("greater_equal_0_y_0_1"), val = tensor<int32, []>(0)];
tensor<int16, [1, 256]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor<string, []>("cast_4")];
tensor<int32, [1, 256]> cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor<string, []>("cast_3")];
tensor<bool, [1, 256]> greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor<string, []>("greater_equal_0_1")];
tensor<int32, []> slice_by_index_0_1 = const()[name = tensor<string, []>("slice_by_index_0_1"), val = tensor<int32, []>(2362)];
tensor<int32, [1, 256]> add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor<string, []>("add_0_1")];
tensor<int32, [1, 256]> select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor<string, []>("select_0_1")];
tensor<int32, []> x_1_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor<string, []>("x_1_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor<int32, []>(0)];
tensor<fp16, [1, 256, 768]> x_1_cast_fp16_cast_uint16_cast_uint16 = gather(axis = x_1_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = x_1_batch_dims_0, indices = select_0_1, validate_indices = x_1_validate_indices_0, x = text_embedding_weight_to_fp16_quantized)[name = tensor<string, []>("x_1_cast_fp16_cast_uint16_cast_uint16")];
tensor<int32, []> var_26 = const()[name = tensor<string, []>("op_26"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 256, 768]> op_50_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("op_50_to_fp16_quantized"), quantized_data = tensor<int8, [1, 256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1821376))), scale = tensor<fp16, []>(0x1.544p-6), zero_point = tensor<int8, []>(0)];
tensor<fp16, [1, 256, 768]> input_5_cast_fp16 = add(x = x_1_cast_fp16_cast_uint16_cast_uint16, y = op_50_to_fp16_quantized)[name = tensor<string, []>("input_5_cast_fp16")];
tensor<int32, [1]> var_57_axes_0 = const()[name = tensor<string, []>("op_57_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<string, []> mask_to_fp16_dtype_0 = const()[name = tensor<string, []>("mask_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [1, 256]> mask_to_fp16 = cast(dtype = mask_to_fp16_dtype_0, x = mask)[name = tensor<string, []>("cast_2")];
tensor<fp16, [1, 256, 1]> var_57_cast_fp16 = expand_dims(axes = var_57_axes_0, x = mask_to_fp16)[name = tensor<string, []>("op_57_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_7_cast_fp16 = mul(x = input_5_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
tensor<int32, [1]> query_1_axes_0 = const()[name = tensor<string, []>("query_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_0_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_0_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2018048)))];
tensor<fp16, []> var_15_to_fp16 = const()[name = tensor<string, []>("op_15_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 256, 768]> query_1_cast_fp16 = layer_norm(axes = query_1_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_0_norm_self_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
tensor<fp16, [2304, 768]> encoder_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2019648))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3791552))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3789184)))];
tensor<fp16, [2304]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3796224)))];
tensor<fp16, [1, 256, 2304]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
tensor<int32, [5]> var_69 = const()[name = tensor<string, []>("op_69"), val = tensor<int32, [5]>([1, 256, 3, 12, 64])];
tensor<fp16, [1, 256, 3, 12, 64]> qkv_1_cast_fp16 = reshape(shape = var_69, x = linear_0_cast_fp16)[name = tensor<string, []>("qkv_1_cast_fp16")];
tensor<int32, [3]> var_71_split_sizes_0 = const()[name = tensor<string, []>("op_71_split_sizes_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<int32, []> var_71_axis_0 = const()[name = tensor<string, []>("op_71_axis_0"), val = tensor<int32, []>(2)];
tensor<fp16, [1, 256, 1, 12, 64]> var_71_cast_fp16_0, tensor<fp16, [1, 256, 1, 12, 64]> var_71_cast_fp16_1, tensor<fp16, [1, 256, 1, 12, 64]> var_71_cast_fp16_2 = split(axis = var_71_axis_0, split_sizes = var_71_split_sizes_0, x = qkv_1_cast_fp16)[name = tensor<string, []>("op_71_cast_fp16")];
tensor<int32, [1]> q_3_axes_0 = const()[name = tensor<string, []>("q_3_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> q_3_cast_fp16 = squeeze(axes = q_3_axes_0, x = var_71_cast_fp16_0)[name = tensor<string, []>("q_3_cast_fp16")];
tensor<int32, [1]> k_3_axes_0 = const()[name = tensor<string, []>("k_3_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> k_3_cast_fp16 = squeeze(axes = k_3_axes_0, x = var_71_cast_fp16_1)[name = tensor<string, []>("k_3_cast_fp16")];
tensor<int32, [1]> v_3_axes_0 = const()[name = tensor<string, []>("v_3_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> v_3_cast_fp16 = squeeze(axes = v_3_axes_0, x = var_71_cast_fp16_2)[name = tensor<string, []>("v_3_cast_fp16")];
tensor<int32, [1]> var_77_axes_0 = const()[name = tensor<string, []>("op_77_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [1, 1, 256]> var_77_cast_fp16 = expand_dims(axes = var_77_axes_0, x = mask_to_fp16)[name = tensor<string, []>("op_77_cast_fp16")];
tensor<int32, [1]> var_78_axes_0 = const()[name = tensor<string, []>("op_78_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 1]> var_78_cast_fp16 = expand_dims(axes = var_78_axes_0, x = mask_to_fp16)[name = tensor<string, []>("op_78_cast_fp16")];
tensor<fp16, [1, 256, 256]> mask_3_cast_fp16 = mul(x = var_77_cast_fp16, y = var_78_cast_fp16)[name = tensor<string, []>("mask_3_cast_fp16")];
tensor<int32, [1]> mask_5_axes_0 = const()[name = tensor<string, []>("mask_5_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [1, 1, 256, 256]> mask_5_cast_fp16 = expand_dims(axes = mask_5_axes_0, x = mask_3_cast_fp16)[name = tensor<string, []>("mask_5_cast_fp16")];
tensor<int32, [4]> v_5_perm_0 = const()[name = tensor<string, []>("v_5_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<bool, []> var_87_transpose_x_0 = const()[name = tensor<string, []>("op_87_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_87_transpose_y_0 = const()[name = tensor<string, []>("op_87_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_24_perm_0 = const()[name = tensor<string, []>("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_25_perm_0 = const()[name = tensor<string, []>("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 256]> transpose_25 = transpose(perm = transpose_25_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_70")];
tensor<fp16, [1, 12, 256, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = q_3_cast_fp16)[name = tensor<string, []>("transpose_71")];
tensor<fp16, [1, 12, 256, 256]> var_87_cast_fp16 = matmul(transpose_x = var_87_transpose_x_0, transpose_y = var_87_transpose_y_0, x = transpose_24, y = transpose_25)[name = tensor<string, []>("op_87_cast_fp16")];
tensor<fp16, []> var_88_to_fp16 = const()[name = tensor<string, []>("op_88_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 256, 256]> attn_score_1_cast_fp16 = mul(x = var_87_cast_fp16, y = var_88_to_fp16)[name = tensor<string, []>("attn_score_1_cast_fp16")];
tensor<fp16, []> var_27_promoted_to_fp16 = const()[name = tensor<string, []>("op_27_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<bool, [1, 1, 256, 256]> var_90_cast_fp16 = equal(x = mask_5_cast_fp16, y = var_27_promoted_to_fp16)[name = tensor<string, []>("op_90_cast_fp16")];
tensor<fp16, []> var_18_to_fp16 = const()[name = tensor<string, []>("op_18_to_fp16"), val = tensor<fp16, []>(-inf)];
tensor<fp16, [1, 12, 256, 256]> attn_score_3_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_1_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("attn_score_3_cast_fp16")];
tensor<fp16, [1, 1, 256, 256]> input_9_cast_fp16_x_0 = const()[name = tensor<string, []>("input_9_cast_fp16_x_0"), val = tensor<fp16, [1, 1, 256, 256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3800896)))];
tensor<fp16, [1, 12, 256, 256]> input_9_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_3_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_prob_1_cast_fp16 = softmax(axis = var_26, x = input_9_cast_fp16)[name = tensor<string, []>("attn_prob_1_cast_fp16")];
tensor<fp16, []> var_17_to_fp16 = const()[name = tensor<string, []>("op_17_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 12, 256, 256]> input_11_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_1_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
tensor<bool, []> y_1_transpose_x_0 = const()[name = tensor<string, []>("y_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> y_1_transpose_y_0 = const()[name = tensor<string, []>("y_1_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 256, 64]> v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = v_3_cast_fp16)[name = tensor<string, []>("transpose_69")];
tensor<fp16, [1, 12, 256, 64]> y_1_cast_fp16 = matmul(transpose_x = y_1_transpose_x_0, transpose_y = y_1_transpose_y_0, x = input_11_cast_fp16, y = v_5_cast_fp16)[name = tensor<string, []>("y_1_cast_fp16")];
tensor<int32, [4]> var_101_perm_0 = const()[name = tensor<string, []>("op_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_103 = const()[name = tensor<string, []>("op_103"), val = tensor<int32, [3]>([1, 256, -1])];
tensor<fp16, [1, 256, 12, 64]> var_101_cast_fp16 = transpose(perm = var_101_perm_0, x = y_1_cast_fp16)[name = tensor<string, []>("transpose_68")];
tensor<fp16, [1, 256, 768]> input_13_cast_fp16 = reshape(shape = var_103, x = var_101_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
tensor<fp16, [768, 768]> encoder_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3932032))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4522752))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4524352)))];
tensor<fp16, [1, 256, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_17_cast_fp16 = add(x = input_7_cast_fp16, y = linear_1_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
tensor<int32, [1]> x_5_axes_0 = const()[name = tensor<string, []>("x_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4525952)))];
tensor<fp16, [1, 256, 768]> x_5_cast_fp16 = layer_norm(axes = x_5_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_0_norm_pos_ff_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
tensor<int32, [3]> signal_1_perm_0 = const()[name = tensor<string, []>("signal_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 768, 256]> signal_1_cast_fp16 = transpose(perm = signal_1_perm_0, x = x_5_cast_fp16)[name = tensor<string, []>("transpose_67")];
tensor<fp16, [1, 768, 256]> input_19_cast_fp16 = mul(x = signal_1_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
tensor<int32, [6]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_7_to_fp16 = const()[name = tensor<string, []>("const_7_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 768, 258]> input_21_cast_fp16 = pad(constant_val = const_7_to_fp16, mode = input_21_mode_0, pad = input_21_pad_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
tensor<string, []> conv_signal_1_pad_type_0 = const()[name = tensor<string, []>("conv_signal_1_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_1_strides_0 = const()[name = tensor<string, []>("conv_signal_1_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_1_pad_0 = const()[name = tensor<string, []>("conv_signal_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_1_dilations_0 = const()[name = tensor<string, []>("conv_signal_1_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_1_groups_0 = const()[name = tensor<string, []>("conv_signal_1_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 3]> encoder_layers_0_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_0_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4527552))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11608640))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11605504)))];
tensor<fp16, [1, 3072, 256]> conv_signal_1_cast_fp16 = conv(dilations = conv_signal_1_dilations_0, groups = conv_signal_1_groups_0, pad = conv_signal_1_pad_0, pad_type = conv_signal_1_pad_type_0, strides = conv_signal_1_strides_0, weight = encoder_layers_0_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor<string, []>("conv_signal_1_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_23_cast_fp16 = mul(x = conv_signal_1_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
tensor<string, []> signal_3_mode_0 = const()[name = tensor<string, []>("signal_3_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 256]> signal_3_cast_fp16 = gelu(mode = signal_3_mode_0, x = input_23_cast_fp16)[name = tensor<string, []>("signal_3_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_25_cast_fp16 = mul(x = signal_3_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
tensor<int32, [6]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_27_mode_0 = const()[name = tensor<string, []>("input_27_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_8_to_fp16 = const()[name = tensor<string, []>("const_8_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 3072, 258]> input_27_cast_fp16 = pad(constant_val = const_8_to_fp16, mode = input_27_mode_0, pad = input_27_pad_0, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
tensor<string, []> conv_signal_3_pad_type_0 = const()[name = tensor<string, []>("conv_signal_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_3_strides_0 = const()[name = tensor<string, []>("conv_signal_3_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_3_pad_0 = const()[name = tensor<string, []>("conv_signal_3_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_3_dilations_0 = const()[name = tensor<string, []>("conv_signal_3_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_3_groups_0 = const()[name = tensor<string, []>("conv_signal_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 3]> encoder_layers_0_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_0_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11614848))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18692800))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 768, 256]> conv_signal_3_cast_fp16 = conv(dilations = conv_signal_3_dilations_0, groups = conv_signal_3_groups_0, pad = conv_signal_3_pad_0, pad_type = conv_signal_3_pad_type_0, strides = conv_signal_3_strides_0, weight = encoder_layers_0_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor<string, []>("conv_signal_3_cast_fp16")];
tensor<fp16, [1, 768, 256]> var_141_cast_fp16 = mul(x = conv_signal_3_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("op_141_cast_fp16")];
tensor<int32, [3]> input_29_perm_0 = const()[name = tensor<string, []>("input_29_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 256, 768]> input_29_cast_fp16 = transpose(perm = input_29_perm_0, x = var_141_cast_fp16)[name = tensor<string, []>("transpose_66")];
tensor<fp16, [1, 256, 768]> x_7_cast_fp16 = add(x = input_17_cast_fp16, y = input_29_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
tensor<fp16, [1, 256, 768]> x_9_cast_fp16 = mul(x = x_7_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_31_cast_fp16 = mul(x = x_9_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
tensor<int32, [1]> query_3_axes_0 = const()[name = tensor<string, []>("query_3_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_1_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_1_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18694400)))];
tensor<fp16, [1, 256, 768]> query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_1_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
tensor<fp16, [2304, 768]> encoder_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18696000))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20465536))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3789184)))];
tensor<fp16, [1, 256, 2304]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
tensor<int32, [5]> var_163 = const()[name = tensor<string, []>("op_163"), val = tensor<int32, [5]>([1, 256, 3, 12, 64])];
tensor<fp16, [1, 256, 3, 12, 64]> qkv_3_cast_fp16 = reshape(shape = var_163, x = linear_2_cast_fp16)[name = tensor<string, []>("qkv_3_cast_fp16")];
tensor<int32, [3]> var_165_split_sizes_0 = const()[name = tensor<string, []>("op_165_split_sizes_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<int32, []> var_165_axis_0 = const()[name = tensor<string, []>("op_165_axis_0"), val = tensor<int32, []>(2)];
tensor<fp16, [1, 256, 1, 12, 64]> var_165_cast_fp16_0, tensor<fp16, [1, 256, 1, 12, 64]> var_165_cast_fp16_1, tensor<fp16, [1, 256, 1, 12, 64]> var_165_cast_fp16_2 = split(axis = var_165_axis_0, split_sizes = var_165_split_sizes_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("op_165_cast_fp16")];
tensor<int32, [1]> q_9_axes_0 = const()[name = tensor<string, []>("q_9_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> q_9_cast_fp16 = squeeze(axes = q_9_axes_0, x = var_165_cast_fp16_0)[name = tensor<string, []>("q_9_cast_fp16")];
tensor<int32, [1]> k_9_axes_0 = const()[name = tensor<string, []>("k_9_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> k_9_cast_fp16 = squeeze(axes = k_9_axes_0, x = var_165_cast_fp16_1)[name = tensor<string, []>("k_9_cast_fp16")];
tensor<int32, [1]> v_9_axes_0 = const()[name = tensor<string, []>("v_9_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> v_9_cast_fp16 = squeeze(axes = v_9_axes_0, x = var_165_cast_fp16_2)[name = tensor<string, []>("v_9_cast_fp16")];
tensor<int32, [4]> v_11_perm_0 = const()[name = tensor<string, []>("v_11_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<bool, []> var_181_transpose_x_0 = const()[name = tensor<string, []>("op_181_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_181_transpose_y_0 = const()[name = tensor<string, []>("op_181_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_26_perm_0 = const()[name = tensor<string, []>("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_27_perm_0 = const()[name = tensor<string, []>("transpose_27_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 256]> transpose_27 = transpose(perm = transpose_27_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_64")];
tensor<fp16, [1, 12, 256, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = q_9_cast_fp16)[name = tensor<string, []>("transpose_65")];
tensor<fp16, [1, 12, 256, 256]> var_181_cast_fp16 = matmul(transpose_x = var_181_transpose_x_0, transpose_y = var_181_transpose_y_0, x = transpose_26, y = transpose_27)[name = tensor<string, []>("op_181_cast_fp16")];
tensor<fp16, []> var_182_to_fp16 = const()[name = tensor<string, []>("op_182_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 256, 256]> attn_score_5_cast_fp16 = mul(x = var_181_cast_fp16, y = var_182_to_fp16)[name = tensor<string, []>("attn_score_5_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_score_7_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_5_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("attn_score_7_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_33_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_7_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_prob_5_cast_fp16 = softmax(axis = var_26, x = input_33_cast_fp16)[name = tensor<string, []>("attn_prob_5_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_35_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_5_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
tensor<bool, []> y_3_transpose_x_0 = const()[name = tensor<string, []>("y_3_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> y_3_transpose_y_0 = const()[name = tensor<string, []>("y_3_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 256, 64]> v_11_cast_fp16 = transpose(perm = v_11_perm_0, x = v_9_cast_fp16)[name = tensor<string, []>("transpose_63")];
tensor<fp16, [1, 12, 256, 64]> y_3_cast_fp16 = matmul(transpose_x = y_3_transpose_x_0, transpose_y = y_3_transpose_y_0, x = input_35_cast_fp16, y = v_11_cast_fp16)[name = tensor<string, []>("y_3_cast_fp16")];
tensor<int32, [4]> var_195_perm_0 = const()[name = tensor<string, []>("op_195_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_197 = const()[name = tensor<string, []>("op_197"), val = tensor<int32, [3]>([1, 256, -1])];
tensor<fp16, [1, 256, 12, 64]> var_195_cast_fp16 = transpose(perm = var_195_perm_0, x = y_3_cast_fp16)[name = tensor<string, []>("transpose_62")];
tensor<fp16, [1, 256, 768]> input_37_cast_fp16 = reshape(shape = var_197, x = var_195_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
tensor<fp16, [768, 768]> encoder_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_1_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20470208))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21060096))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 256, 768]> linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_37_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_41_cast_fp16 = add(x = input_31_cast_fp16, y = linear_3_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
tensor<int32, [1]> x_11_axes_0 = const()[name = tensor<string, []>("x_11_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21061696)))];
tensor<fp16, [1, 256, 768]> x_11_cast_fp16 = layer_norm(axes = x_11_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_1_norm_pos_ff_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
tensor<int32, [3]> signal_5_perm_0 = const()[name = tensor<string, []>("signal_5_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 768, 256]> signal_5_cast_fp16 = transpose(perm = signal_5_perm_0, x = x_11_cast_fp16)[name = tensor<string, []>("transpose_61")];
tensor<fp16, [1, 768, 256]> input_43_cast_fp16 = mul(x = signal_5_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
tensor<int32, [6]> input_45_pad_0 = const()[name = tensor<string, []>("input_45_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_45_mode_0 = const()[name = tensor<string, []>("input_45_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_13_to_fp16 = const()[name = tensor<string, []>("const_13_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 768, 258]> input_45_cast_fp16 = pad(constant_val = const_13_to_fp16, mode = input_45_mode_0, pad = input_45_pad_0, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
tensor<string, []> conv_signal_5_pad_type_0 = const()[name = tensor<string, []>("conv_signal_5_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_5_strides_0 = const()[name = tensor<string, []>("conv_signal_5_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_5_pad_0 = const()[name = tensor<string, []>("conv_signal_5_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_5_dilations_0 = const()[name = tensor<string, []>("conv_signal_5_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_5_groups_0 = const()[name = tensor<string, []>("conv_signal_5_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 3]> encoder_layers_1_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_1_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21063296))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28141248))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11605504)))];
tensor<fp16, [1, 3072, 256]> conv_signal_5_cast_fp16 = conv(dilations = conv_signal_5_dilations_0, groups = conv_signal_5_groups_0, pad = conv_signal_5_pad_0, pad_type = conv_signal_5_pad_type_0, strides = conv_signal_5_strides_0, weight = encoder_layers_1_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_45_cast_fp16)[name = tensor<string, []>("conv_signal_5_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_47_cast_fp16 = mul(x = conv_signal_5_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
tensor<string, []> signal_7_mode_0 = const()[name = tensor<string, []>("signal_7_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 256]> signal_7_cast_fp16 = gelu(mode = signal_7_mode_0, x = input_47_cast_fp16)[name = tensor<string, []>("signal_7_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_49_cast_fp16 = mul(x = signal_7_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
tensor<int32, [6]> input_51_pad_0 = const()[name = tensor<string, []>("input_51_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_14_to_fp16 = const()[name = tensor<string, []>("const_14_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 3072, 258]> input_51_cast_fp16 = pad(constant_val = const_14_to_fp16, mode = input_51_mode_0, pad = input_51_pad_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
tensor<string, []> conv_signal_7_pad_type_0 = const()[name = tensor<string, []>("conv_signal_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_7_strides_0 = const()[name = tensor<string, []>("conv_signal_7_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_7_pad_0 = const()[name = tensor<string, []>("conv_signal_7_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_7_dilations_0 = const()[name = tensor<string, []>("conv_signal_7_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_7_groups_0 = const()[name = tensor<string, []>("conv_signal_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 3]> encoder_layers_1_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_1_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28147456))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35225408))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 768, 256]> conv_signal_7_cast_fp16 = conv(dilations = conv_signal_7_dilations_0, groups = conv_signal_7_groups_0, pad = conv_signal_7_pad_0, pad_type = conv_signal_7_pad_type_0, strides = conv_signal_7_strides_0, weight = encoder_layers_1_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = tensor<string, []>("conv_signal_7_cast_fp16")];
tensor<fp16, [1, 768, 256]> var_235_cast_fp16 = mul(x = conv_signal_7_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("op_235_cast_fp16")];
tensor<int32, [3]> input_53_perm_0 = const()[name = tensor<string, []>("input_53_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 256, 768]> input_53_cast_fp16 = transpose(perm = input_53_perm_0, x = var_235_cast_fp16)[name = tensor<string, []>("transpose_60")];
tensor<fp16, [1, 256, 768]> x_13_cast_fp16 = add(x = input_41_cast_fp16, y = input_53_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
tensor<fp16, [1, 256, 768]> x_15_cast_fp16 = mul(x = x_13_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_55_cast_fp16 = mul(x = x_15_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
tensor<int32, [1]> query_5_axes_0 = const()[name = tensor<string, []>("query_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_2_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_2_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35227008)))];
tensor<fp16, [1, 256, 768]> query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_2_norm_self_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
tensor<fp16, [2304, 768]> encoder_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_2_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35228608))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36998144))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3789184)))];
tensor<fp16, [1, 256, 2304]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
tensor<int32, [5]> var_257 = const()[name = tensor<string, []>("op_257"), val = tensor<int32, [5]>([1, 256, 3, 12, 64])];
tensor<fp16, [1, 256, 3, 12, 64]> qkv_5_cast_fp16 = reshape(shape = var_257, x = linear_4_cast_fp16)[name = tensor<string, []>("qkv_5_cast_fp16")];
tensor<int32, [3]> var_259_split_sizes_0 = const()[name = tensor<string, []>("op_259_split_sizes_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<int32, []> var_259_axis_0 = const()[name = tensor<string, []>("op_259_axis_0"), val = tensor<int32, []>(2)];
tensor<fp16, [1, 256, 1, 12, 64]> var_259_cast_fp16_0, tensor<fp16, [1, 256, 1, 12, 64]> var_259_cast_fp16_1, tensor<fp16, [1, 256, 1, 12, 64]> var_259_cast_fp16_2 = split(axis = var_259_axis_0, split_sizes = var_259_split_sizes_0, x = qkv_5_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
tensor<int32, [1]> q_15_axes_0 = const()[name = tensor<string, []>("q_15_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> q_15_cast_fp16 = squeeze(axes = q_15_axes_0, x = var_259_cast_fp16_0)[name = tensor<string, []>("q_15_cast_fp16")];
tensor<int32, [1]> k_15_axes_0 = const()[name = tensor<string, []>("k_15_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> k_15_cast_fp16 = squeeze(axes = k_15_axes_0, x = var_259_cast_fp16_1)[name = tensor<string, []>("k_15_cast_fp16")];
tensor<int32, [1]> v_15_axes_0 = const()[name = tensor<string, []>("v_15_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> v_15_cast_fp16 = squeeze(axes = v_15_axes_0, x = var_259_cast_fp16_2)[name = tensor<string, []>("v_15_cast_fp16")];
tensor<int32, [4]> v_17_perm_0 = const()[name = tensor<string, []>("v_17_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<bool, []> var_275_transpose_x_0 = const()[name = tensor<string, []>("op_275_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_275_transpose_y_0 = const()[name = tensor<string, []>("op_275_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_28_perm_0 = const()[name = tensor<string, []>("transpose_28_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_29_perm_0 = const()[name = tensor<string, []>("transpose_29_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 256]> transpose_29 = transpose(perm = transpose_29_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_58")];
tensor<fp16, [1, 12, 256, 64]> transpose_28 = transpose(perm = transpose_28_perm_0, x = q_15_cast_fp16)[name = tensor<string, []>("transpose_59")];
tensor<fp16, [1, 12, 256, 256]> var_275_cast_fp16 = matmul(transpose_x = var_275_transpose_x_0, transpose_y = var_275_transpose_y_0, x = transpose_28, y = transpose_29)[name = tensor<string, []>("op_275_cast_fp16")];
tensor<fp16, []> var_276_to_fp16 = const()[name = tensor<string, []>("op_276_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 256, 256]> attn_score_9_cast_fp16 = mul(x = var_275_cast_fp16, y = var_276_to_fp16)[name = tensor<string, []>("attn_score_9_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_score_11_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_9_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("attn_score_11_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_57_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_11_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_prob_9_cast_fp16 = softmax(axis = var_26, x = input_57_cast_fp16)[name = tensor<string, []>("attn_prob_9_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_59_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_9_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
tensor<bool, []> y_5_transpose_x_0 = const()[name = tensor<string, []>("y_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> y_5_transpose_y_0 = const()[name = tensor<string, []>("y_5_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 256, 64]> v_17_cast_fp16 = transpose(perm = v_17_perm_0, x = v_15_cast_fp16)[name = tensor<string, []>("transpose_57")];
tensor<fp16, [1, 12, 256, 64]> y_5_cast_fp16 = matmul(transpose_x = y_5_transpose_x_0, transpose_y = y_5_transpose_y_0, x = input_59_cast_fp16, y = v_17_cast_fp16)[name = tensor<string, []>("y_5_cast_fp16")];
tensor<int32, [4]> var_289_perm_0 = const()[name = tensor<string, []>("op_289_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_291 = const()[name = tensor<string, []>("op_291"), val = tensor<int32, [3]>([1, 256, -1])];
tensor<fp16, [1, 256, 12, 64]> var_289_cast_fp16 = transpose(perm = var_289_perm_0, x = y_5_cast_fp16)[name = tensor<string, []>("transpose_56")];
tensor<fp16, [1, 256, 768]> input_61_cast_fp16 = reshape(shape = var_291, x = var_289_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
tensor<fp16, [768, 768]> encoder_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_2_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37002816))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37592704))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 256, 768]> linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_65_cast_fp16 = add(x = input_55_cast_fp16, y = linear_5_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
tensor<int32, [1]> x_17_axes_0 = const()[name = tensor<string, []>("x_17_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37594304)))];
tensor<fp16, [1, 256, 768]> x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_2_norm_pos_ff_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
tensor<int32, [3]> signal_9_perm_0 = const()[name = tensor<string, []>("signal_9_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 768, 256]> signal_9_cast_fp16 = transpose(perm = signal_9_perm_0, x = x_17_cast_fp16)[name = tensor<string, []>("transpose_55")];
tensor<fp16, [1, 768, 256]> input_67_cast_fp16 = mul(x = signal_9_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
tensor<int32, [6]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_19_to_fp16 = const()[name = tensor<string, []>("const_19_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 768, 258]> input_69_cast_fp16 = pad(constant_val = const_19_to_fp16, mode = input_69_mode_0, pad = input_69_pad_0, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
tensor<string, []> conv_signal_9_pad_type_0 = const()[name = tensor<string, []>("conv_signal_9_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_9_strides_0 = const()[name = tensor<string, []>("conv_signal_9_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_9_pad_0 = const()[name = tensor<string, []>("conv_signal_9_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_9_dilations_0 = const()[name = tensor<string, []>("conv_signal_9_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_9_groups_0 = const()[name = tensor<string, []>("conv_signal_9_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 3]> encoder_layers_2_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_2_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37595904))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44673856))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11605504)))];
tensor<fp16, [1, 3072, 256]> conv_signal_9_cast_fp16 = conv(dilations = conv_signal_9_dilations_0, groups = conv_signal_9_groups_0, pad = conv_signal_9_pad_0, pad_type = conv_signal_9_pad_type_0, strides = conv_signal_9_strides_0, weight = encoder_layers_2_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor<string, []>("conv_signal_9_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_71_cast_fp16 = mul(x = conv_signal_9_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
tensor<string, []> signal_11_mode_0 = const()[name = tensor<string, []>("signal_11_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 256]> signal_11_cast_fp16 = gelu(mode = signal_11_mode_0, x = input_71_cast_fp16)[name = tensor<string, []>("signal_11_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_73_cast_fp16 = mul(x = signal_11_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
tensor<int32, [6]> input_75_pad_0 = const()[name = tensor<string, []>("input_75_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_75_mode_0 = const()[name = tensor<string, []>("input_75_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_20_to_fp16 = const()[name = tensor<string, []>("const_20_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 3072, 258]> input_75_cast_fp16 = pad(constant_val = const_20_to_fp16, mode = input_75_mode_0, pad = input_75_pad_0, x = input_73_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
tensor<string, []> conv_signal_11_pad_type_0 = const()[name = tensor<string, []>("conv_signal_11_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_11_strides_0 = const()[name = tensor<string, []>("conv_signal_11_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_11_pad_0 = const()[name = tensor<string, []>("conv_signal_11_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_11_dilations_0 = const()[name = tensor<string, []>("conv_signal_11_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_11_groups_0 = const()[name = tensor<string, []>("conv_signal_11_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 3]> encoder_layers_2_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_2_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44680064))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51758016))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 768, 256]> conv_signal_11_cast_fp16 = conv(dilations = conv_signal_11_dilations_0, groups = conv_signal_11_groups_0, pad = conv_signal_11_pad_0, pad_type = conv_signal_11_pad_type_0, strides = conv_signal_11_strides_0, weight = encoder_layers_2_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = tensor<string, []>("conv_signal_11_cast_fp16")];
tensor<fp16, [1, 768, 256]> var_329_cast_fp16 = mul(x = conv_signal_11_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("op_329_cast_fp16")];
tensor<int32, [3]> input_77_perm_0 = const()[name = tensor<string, []>("input_77_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 256, 768]> input_77_cast_fp16 = transpose(perm = input_77_perm_0, x = var_329_cast_fp16)[name = tensor<string, []>("transpose_54")];
tensor<fp16, [1, 256, 768]> x_19_cast_fp16 = add(x = input_65_cast_fp16, y = input_77_cast_fp16)[name = tensor<string, []>("x_19_cast_fp16")];
tensor<fp16, [1, 256, 768]> x_21_cast_fp16 = mul(x = x_19_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("x_21_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_79_cast_fp16 = mul(x = x_21_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
tensor<int32, [1]> query_7_axes_0 = const()[name = tensor<string, []>("query_7_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_3_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_3_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51759616)))];
tensor<fp16, [1, 256, 768]> query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_3_norm_self_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
tensor<fp16, [2304, 768]> encoder_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_3_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51761216))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53530752))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3789184)))];
tensor<fp16, [1, 256, 2304]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
tensor<int32, [5]> var_351 = const()[name = tensor<string, []>("op_351"), val = tensor<int32, [5]>([1, 256, 3, 12, 64])];
tensor<fp16, [1, 256, 3, 12, 64]> qkv_7_cast_fp16 = reshape(shape = var_351, x = linear_6_cast_fp16)[name = tensor<string, []>("qkv_7_cast_fp16")];
tensor<int32, [3]> var_353_split_sizes_0 = const()[name = tensor<string, []>("op_353_split_sizes_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<int32, []> var_353_axis_0 = const()[name = tensor<string, []>("op_353_axis_0"), val = tensor<int32, []>(2)];
tensor<fp16, [1, 256, 1, 12, 64]> var_353_cast_fp16_0, tensor<fp16, [1, 256, 1, 12, 64]> var_353_cast_fp16_1, tensor<fp16, [1, 256, 1, 12, 64]> var_353_cast_fp16_2 = split(axis = var_353_axis_0, split_sizes = var_353_split_sizes_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("op_353_cast_fp16")];
tensor<int32, [1]> q_21_axes_0 = const()[name = tensor<string, []>("q_21_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> q_21_cast_fp16 = squeeze(axes = q_21_axes_0, x = var_353_cast_fp16_0)[name = tensor<string, []>("q_21_cast_fp16")];
tensor<int32, [1]> k_21_axes_0 = const()[name = tensor<string, []>("k_21_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> k_21_cast_fp16 = squeeze(axes = k_21_axes_0, x = var_353_cast_fp16_1)[name = tensor<string, []>("k_21_cast_fp16")];
tensor<int32, [1]> v_21_axes_0 = const()[name = tensor<string, []>("v_21_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> v_21_cast_fp16 = squeeze(axes = v_21_axes_0, x = var_353_cast_fp16_2)[name = tensor<string, []>("v_21_cast_fp16")];
tensor<int32, [4]> v_23_perm_0 = const()[name = tensor<string, []>("v_23_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<bool, []> var_369_transpose_x_0 = const()[name = tensor<string, []>("op_369_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_369_transpose_y_0 = const()[name = tensor<string, []>("op_369_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_30_perm_0 = const()[name = tensor<string, []>("transpose_30_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_31_perm_0 = const()[name = tensor<string, []>("transpose_31_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 256]> transpose_31 = transpose(perm = transpose_31_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_52")];
tensor<fp16, [1, 12, 256, 64]> transpose_30 = transpose(perm = transpose_30_perm_0, x = q_21_cast_fp16)[name = tensor<string, []>("transpose_53")];
tensor<fp16, [1, 12, 256, 256]> var_369_cast_fp16 = matmul(transpose_x = var_369_transpose_x_0, transpose_y = var_369_transpose_y_0, x = transpose_30, y = transpose_31)[name = tensor<string, []>("op_369_cast_fp16")];
tensor<fp16, []> var_370_to_fp16 = const()[name = tensor<string, []>("op_370_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 256, 256]> attn_score_13_cast_fp16 = mul(x = var_369_cast_fp16, y = var_370_to_fp16)[name = tensor<string, []>("attn_score_13_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_score_15_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_13_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("attn_score_15_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_81_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_15_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_prob_13_cast_fp16 = softmax(axis = var_26, x = input_81_cast_fp16)[name = tensor<string, []>("attn_prob_13_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_83_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_13_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
tensor<bool, []> y_7_transpose_x_0 = const()[name = tensor<string, []>("y_7_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> y_7_transpose_y_0 = const()[name = tensor<string, []>("y_7_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 256, 64]> v_23_cast_fp16 = transpose(perm = v_23_perm_0, x = v_21_cast_fp16)[name = tensor<string, []>("transpose_51")];
tensor<fp16, [1, 12, 256, 64]> y_7_cast_fp16 = matmul(transpose_x = y_7_transpose_x_0, transpose_y = y_7_transpose_y_0, x = input_83_cast_fp16, y = v_23_cast_fp16)[name = tensor<string, []>("y_7_cast_fp16")];
tensor<int32, [4]> var_383_perm_0 = const()[name = tensor<string, []>("op_383_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_385 = const()[name = tensor<string, []>("op_385"), val = tensor<int32, [3]>([1, 256, -1])];
tensor<fp16, [1, 256, 12, 64]> var_383_cast_fp16 = transpose(perm = var_383_perm_0, x = y_7_cast_fp16)[name = tensor<string, []>("transpose_50")];
tensor<fp16, [1, 256, 768]> input_85_cast_fp16 = reshape(shape = var_385, x = var_383_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
tensor<fp16, [768, 768]> encoder_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_3_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53535424))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54125312))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 256, 768]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_85_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_89_cast_fp16 = add(x = input_79_cast_fp16, y = linear_7_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
tensor<int32, [1]> x_23_axes_0 = const()[name = tensor<string, []>("x_23_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54126912)))];
tensor<fp16, [1, 256, 768]> x_23_cast_fp16 = layer_norm(axes = x_23_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_3_norm_pos_ff_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")];
tensor<int32, [3]> signal_13_perm_0 = const()[name = tensor<string, []>("signal_13_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 768, 256]> signal_13_cast_fp16 = transpose(perm = signal_13_perm_0, x = x_23_cast_fp16)[name = tensor<string, []>("transpose_49")];
tensor<fp16, [1, 768, 256]> input_91_cast_fp16 = mul(x = signal_13_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
tensor<int32, [6]> input_93_pad_0 = const()[name = tensor<string, []>("input_93_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_93_mode_0 = const()[name = tensor<string, []>("input_93_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_25_to_fp16 = const()[name = tensor<string, []>("const_25_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 768, 258]> input_93_cast_fp16 = pad(constant_val = const_25_to_fp16, mode = input_93_mode_0, pad = input_93_pad_0, x = input_91_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
tensor<string, []> conv_signal_13_pad_type_0 = const()[name = tensor<string, []>("conv_signal_13_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_13_strides_0 = const()[name = tensor<string, []>("conv_signal_13_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_13_pad_0 = const()[name = tensor<string, []>("conv_signal_13_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_13_dilations_0 = const()[name = tensor<string, []>("conv_signal_13_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_13_groups_0 = const()[name = tensor<string, []>("conv_signal_13_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 3]> encoder_layers_3_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_3_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54128512))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61206464))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11605504)))];
tensor<fp16, [1, 3072, 256]> conv_signal_13_cast_fp16 = conv(dilations = conv_signal_13_dilations_0, groups = conv_signal_13_groups_0, pad = conv_signal_13_pad_0, pad_type = conv_signal_13_pad_type_0, strides = conv_signal_13_strides_0, weight = encoder_layers_3_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_93_cast_fp16)[name = tensor<string, []>("conv_signal_13_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_95_cast_fp16 = mul(x = conv_signal_13_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
tensor<string, []> signal_15_mode_0 = const()[name = tensor<string, []>("signal_15_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 256]> signal_15_cast_fp16 = gelu(mode = signal_15_mode_0, x = input_95_cast_fp16)[name = tensor<string, []>("signal_15_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_97_cast_fp16 = mul(x = signal_15_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
tensor<int32, [6]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_99_mode_0 = const()[name = tensor<string, []>("input_99_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_26_to_fp16 = const()[name = tensor<string, []>("const_26_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 3072, 258]> input_99_cast_fp16 = pad(constant_val = const_26_to_fp16, mode = input_99_mode_0, pad = input_99_pad_0, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
tensor<string, []> conv_signal_15_pad_type_0 = const()[name = tensor<string, []>("conv_signal_15_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_15_strides_0 = const()[name = tensor<string, []>("conv_signal_15_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_15_pad_0 = const()[name = tensor<string, []>("conv_signal_15_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_15_dilations_0 = const()[name = tensor<string, []>("conv_signal_15_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_15_groups_0 = const()[name = tensor<string, []>("conv_signal_15_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 3]> encoder_layers_3_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_3_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61212672))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68290624))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 768, 256]> conv_signal_15_cast_fp16 = conv(dilations = conv_signal_15_dilations_0, groups = conv_signal_15_groups_0, pad = conv_signal_15_pad_0, pad_type = conv_signal_15_pad_type_0, strides = conv_signal_15_strides_0, weight = encoder_layers_3_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = tensor<string, []>("conv_signal_15_cast_fp16")];
tensor<fp16, [1, 768, 256]> var_423_cast_fp16 = mul(x = conv_signal_15_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("op_423_cast_fp16")];
tensor<int32, [3]> input_101_perm_0 = const()[name = tensor<string, []>("input_101_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 256, 768]> input_101_cast_fp16 = transpose(perm = input_101_perm_0, x = var_423_cast_fp16)[name = tensor<string, []>("transpose_48")];
tensor<fp16, [1, 256, 768]> x_25_cast_fp16 = add(x = input_89_cast_fp16, y = input_101_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
tensor<fp16, [1, 256, 768]> x_27_cast_fp16 = mul(x = x_25_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("x_27_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_103_cast_fp16 = mul(x = x_27_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
tensor<int32, [1]> query_9_axes_0 = const()[name = tensor<string, []>("query_9_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_4_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_4_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68292224)))];
tensor<fp16, [1, 256, 768]> query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_4_norm_self_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
tensor<fp16, [2304, 768]> encoder_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_4_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68293824))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70063360))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3789184)))];
tensor<fp16, [1, 256, 2304]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
tensor<int32, [5]> var_445 = const()[name = tensor<string, []>("op_445"), val = tensor<int32, [5]>([1, 256, 3, 12, 64])];
tensor<fp16, [1, 256, 3, 12, 64]> qkv_9_cast_fp16 = reshape(shape = var_445, x = linear_8_cast_fp16)[name = tensor<string, []>("qkv_9_cast_fp16")];
tensor<int32, [3]> var_447_split_sizes_0 = const()[name = tensor<string, []>("op_447_split_sizes_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<int32, []> var_447_axis_0 = const()[name = tensor<string, []>("op_447_axis_0"), val = tensor<int32, []>(2)];
tensor<fp16, [1, 256, 1, 12, 64]> var_447_cast_fp16_0, tensor<fp16, [1, 256, 1, 12, 64]> var_447_cast_fp16_1, tensor<fp16, [1, 256, 1, 12, 64]> var_447_cast_fp16_2 = split(axis = var_447_axis_0, split_sizes = var_447_split_sizes_0, x = qkv_9_cast_fp16)[name = tensor<string, []>("op_447_cast_fp16")];
tensor<int32, [1]> q_27_axes_0 = const()[name = tensor<string, []>("q_27_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> q_27_cast_fp16 = squeeze(axes = q_27_axes_0, x = var_447_cast_fp16_0)[name = tensor<string, []>("q_27_cast_fp16")];
tensor<int32, [1]> k_27_axes_0 = const()[name = tensor<string, []>("k_27_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> k_27_cast_fp16 = squeeze(axes = k_27_axes_0, x = var_447_cast_fp16_1)[name = tensor<string, []>("k_27_cast_fp16")];
tensor<int32, [1]> v_27_axes_0 = const()[name = tensor<string, []>("v_27_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> v_27_cast_fp16 = squeeze(axes = v_27_axes_0, x = var_447_cast_fp16_2)[name = tensor<string, []>("v_27_cast_fp16")];
tensor<int32, [4]> v_29_perm_0 = const()[name = tensor<string, []>("v_29_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<bool, []> var_463_transpose_x_0 = const()[name = tensor<string, []>("op_463_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_463_transpose_y_0 = const()[name = tensor<string, []>("op_463_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_32_perm_0 = const()[name = tensor<string, []>("transpose_32_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_33_perm_0 = const()[name = tensor<string, []>("transpose_33_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 256]> transpose_33 = transpose(perm = transpose_33_perm_0, x = k_27_cast_fp16)[name = tensor<string, []>("transpose_46")];
tensor<fp16, [1, 12, 256, 64]> transpose_32 = transpose(perm = transpose_32_perm_0, x = q_27_cast_fp16)[name = tensor<string, []>("transpose_47")];
tensor<fp16, [1, 12, 256, 256]> var_463_cast_fp16 = matmul(transpose_x = var_463_transpose_x_0, transpose_y = var_463_transpose_y_0, x = transpose_32, y = transpose_33)[name = tensor<string, []>("op_463_cast_fp16")];
tensor<fp16, []> var_464_to_fp16 = const()[name = tensor<string, []>("op_464_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 256, 256]> attn_score_17_cast_fp16 = mul(x = var_463_cast_fp16, y = var_464_to_fp16)[name = tensor<string, []>("attn_score_17_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_score_19_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_17_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("attn_score_19_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_105_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_19_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_prob_17_cast_fp16 = softmax(axis = var_26, x = input_105_cast_fp16)[name = tensor<string, []>("attn_prob_17_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_107_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_17_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
tensor<bool, []> y_9_transpose_x_0 = const()[name = tensor<string, []>("y_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> y_9_transpose_y_0 = const()[name = tensor<string, []>("y_9_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 256, 64]> v_29_cast_fp16 = transpose(perm = v_29_perm_0, x = v_27_cast_fp16)[name = tensor<string, []>("transpose_45")];
tensor<fp16, [1, 12, 256, 64]> y_9_cast_fp16 = matmul(transpose_x = y_9_transpose_x_0, transpose_y = y_9_transpose_y_0, x = input_107_cast_fp16, y = v_29_cast_fp16)[name = tensor<string, []>("y_9_cast_fp16")];
tensor<int32, [4]> var_477_perm_0 = const()[name = tensor<string, []>("op_477_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_479 = const()[name = tensor<string, []>("op_479"), val = tensor<int32, [3]>([1, 256, -1])];
tensor<fp16, [1, 256, 12, 64]> var_477_cast_fp16 = transpose(perm = var_477_perm_0, x = y_9_cast_fp16)[name = tensor<string, []>("transpose_44")];
tensor<fp16, [1, 256, 768]> input_109_cast_fp16 = reshape(shape = var_479, x = var_477_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
tensor<fp16, [768, 768]> encoder_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_4_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70068032))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70657920))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 256, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_109_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_113_cast_fp16 = add(x = input_103_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
tensor<int32, [1]> x_29_axes_0 = const()[name = tensor<string, []>("x_29_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70659520)))];
tensor<fp16, [1, 256, 768]> x_29_cast_fp16 = layer_norm(axes = x_29_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_4_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("x_29_cast_fp16")];
tensor<int32, [3]> signal_17_perm_0 = const()[name = tensor<string, []>("signal_17_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 768, 256]> signal_17_cast_fp16 = transpose(perm = signal_17_perm_0, x = x_29_cast_fp16)[name = tensor<string, []>("transpose_43")];
tensor<fp16, [1, 768, 256]> input_115_cast_fp16 = mul(x = signal_17_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
tensor<int32, [6]> input_117_pad_0 = const()[name = tensor<string, []>("input_117_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_117_mode_0 = const()[name = tensor<string, []>("input_117_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_31_to_fp16 = const()[name = tensor<string, []>("const_31_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 768, 258]> input_117_cast_fp16 = pad(constant_val = const_31_to_fp16, mode = input_117_mode_0, pad = input_117_pad_0, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
tensor<string, []> conv_signal_17_pad_type_0 = const()[name = tensor<string, []>("conv_signal_17_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_17_strides_0 = const()[name = tensor<string, []>("conv_signal_17_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_17_pad_0 = const()[name = tensor<string, []>("conv_signal_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_17_dilations_0 = const()[name = tensor<string, []>("conv_signal_17_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_17_groups_0 = const()[name = tensor<string, []>("conv_signal_17_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 3]> encoder_layers_4_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_4_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70661120))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77739072))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11605504)))];
tensor<fp16, [1, 3072, 256]> conv_signal_17_cast_fp16 = conv(dilations = conv_signal_17_dilations_0, groups = conv_signal_17_groups_0, pad = conv_signal_17_pad_0, pad_type = conv_signal_17_pad_type_0, strides = conv_signal_17_strides_0, weight = encoder_layers_4_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = tensor<string, []>("conv_signal_17_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_119_cast_fp16 = mul(x = conv_signal_17_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
tensor<string, []> signal_19_mode_0 = const()[name = tensor<string, []>("signal_19_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 256]> signal_19_cast_fp16 = gelu(mode = signal_19_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("signal_19_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_121_cast_fp16 = mul(x = signal_19_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
tensor<int32, [6]> input_123_pad_0 = const()[name = tensor<string, []>("input_123_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_123_mode_0 = const()[name = tensor<string, []>("input_123_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_32_to_fp16 = const()[name = tensor<string, []>("const_32_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 3072, 258]> input_123_cast_fp16 = pad(constant_val = const_32_to_fp16, mode = input_123_mode_0, pad = input_123_pad_0, x = input_121_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
tensor<string, []> conv_signal_19_pad_type_0 = const()[name = tensor<string, []>("conv_signal_19_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_19_strides_0 = const()[name = tensor<string, []>("conv_signal_19_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_19_pad_0 = const()[name = tensor<string, []>("conv_signal_19_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_19_dilations_0 = const()[name = tensor<string, []>("conv_signal_19_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_19_groups_0 = const()[name = tensor<string, []>("conv_signal_19_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 3]> encoder_layers_4_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_4_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77745280))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84823232))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 768, 256]> conv_signal_19_cast_fp16 = conv(dilations = conv_signal_19_dilations_0, groups = conv_signal_19_groups_0, pad = conv_signal_19_pad_0, pad_type = conv_signal_19_pad_type_0, strides = conv_signal_19_strides_0, weight = encoder_layers_4_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor<string, []>("conv_signal_19_cast_fp16")];
tensor<fp16, [1, 768, 256]> var_517_cast_fp16 = mul(x = conv_signal_19_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("op_517_cast_fp16")];
tensor<int32, [3]> input_125_perm_0 = const()[name = tensor<string, []>("input_125_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 256, 768]> input_125_cast_fp16 = transpose(perm = input_125_perm_0, x = var_517_cast_fp16)[name = tensor<string, []>("transpose_42")];
tensor<fp16, [1, 256, 768]> x_31_cast_fp16 = add(x = input_113_cast_fp16, y = input_125_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
tensor<fp16, [1, 256, 768]> x_33_cast_fp16 = mul(x = x_31_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("x_33_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_127_cast_fp16 = mul(x = x_33_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
tensor<int32, [1]> query_axes_0 = const()[name = tensor<string, []>("query_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_5_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_5_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84824832)))];
tensor<fp16, [1, 256, 768]> query_cast_fp16 = layer_norm(axes = query_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_5_norm_self_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
tensor<fp16, [2304, 768]> encoder_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_5_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84826432))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86595968))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3789184)))];
tensor<fp16, [1, 256, 2304]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
tensor<int32, [5]> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, [5]>([1, 256, 3, 12, 64])];
tensor<fp16, [1, 256, 3, 12, 64]> qkv_cast_fp16 = reshape(shape = var_539, x = linear_10_cast_fp16)[name = tensor<string, []>("qkv_cast_fp16")];
tensor<int32, [3]> var_541_split_sizes_0 = const()[name = tensor<string, []>("op_541_split_sizes_0"), val = tensor<int32, [3]>([1, 1, 1])];
tensor<int32, []> var_541_axis_0 = const()[name = tensor<string, []>("op_541_axis_0"), val = tensor<int32, []>(2)];
tensor<fp16, [1, 256, 1, 12, 64]> var_541_cast_fp16_0, tensor<fp16, [1, 256, 1, 12, 64]> var_541_cast_fp16_1, tensor<fp16, [1, 256, 1, 12, 64]> var_541_cast_fp16_2 = split(axis = var_541_axis_0, split_sizes = var_541_split_sizes_0, x = qkv_cast_fp16)[name = tensor<string, []>("op_541_cast_fp16")];
tensor<int32, [1]> q_33_axes_0 = const()[name = tensor<string, []>("q_33_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> q_33_cast_fp16 = squeeze(axes = q_33_axes_0, x = var_541_cast_fp16_0)[name = tensor<string, []>("q_33_cast_fp16")];
tensor<int32, [1]> k_33_axes_0 = const()[name = tensor<string, []>("k_33_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> k_33_cast_fp16 = squeeze(axes = k_33_axes_0, x = var_541_cast_fp16_1)[name = tensor<string, []>("k_33_cast_fp16")];
tensor<int32, [1]> v_33_axes_0 = const()[name = tensor<string, []>("v_33_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 256, 12, 64]> v_33_cast_fp16 = squeeze(axes = v_33_axes_0, x = var_541_cast_fp16_2)[name = tensor<string, []>("v_33_cast_fp16")];
tensor<int32, [4]> v_perm_0 = const()[name = tensor<string, []>("v_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<bool, []> var_557_transpose_x_0 = const()[name = tensor<string, []>("op_557_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_557_transpose_y_0 = const()[name = tensor<string, []>("op_557_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_34_perm_0 = const()[name = tensor<string, []>("transpose_34_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_35_perm_0 = const()[name = tensor<string, []>("transpose_35_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 256]> transpose_35 = transpose(perm = transpose_35_perm_0, x = k_33_cast_fp16)[name = tensor<string, []>("transpose_40")];
tensor<fp16, [1, 12, 256, 64]> transpose_34 = transpose(perm = transpose_34_perm_0, x = q_33_cast_fp16)[name = tensor<string, []>("transpose_41")];
tensor<fp16, [1, 12, 256, 256]> var_557_cast_fp16 = matmul(transpose_x = var_557_transpose_x_0, transpose_y = var_557_transpose_y_0, x = transpose_34, y = transpose_35)[name = tensor<string, []>("op_557_cast_fp16")];
tensor<fp16, []> var_558_to_fp16 = const()[name = tensor<string, []>("op_558_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 256, 256]> attn_score_21_cast_fp16 = mul(x = var_557_cast_fp16, y = var_558_to_fp16)[name = tensor<string, []>("attn_score_21_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_score_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_21_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("attn_score_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_129_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> attn_prob_21_cast_fp16 = softmax(axis = var_26, x = input_129_cast_fp16)[name = tensor<string, []>("attn_prob_21_cast_fp16")];
tensor<fp16, [1, 12, 256, 256]> input_131_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_21_cast_fp16, cond = var_90_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
tensor<bool, []> y_transpose_x_0 = const()[name = tensor<string, []>("y_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> y_transpose_y_0 = const()[name = tensor<string, []>("y_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 256, 64]> v_cast_fp16 = transpose(perm = v_perm_0, x = v_33_cast_fp16)[name = tensor<string, []>("transpose_39")];
tensor<fp16, [1, 12, 256, 64]> y_cast_fp16 = matmul(transpose_x = y_transpose_x_0, transpose_y = y_transpose_y_0, x = input_131_cast_fp16, y = v_cast_fp16)[name = tensor<string, []>("y_cast_fp16")];
tensor<int32, [4]> var_571_perm_0 = const()[name = tensor<string, []>("op_571_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_573 = const()[name = tensor<string, []>("op_573"), val = tensor<int32, [3]>([1, 256, -1])];
tensor<fp16, [1, 256, 12, 64]> var_571_cast_fp16 = transpose(perm = var_571_perm_0, x = y_cast_fp16)[name = tensor<string, []>("transpose_38")];
tensor<fp16, [1, 256, 768]> input_133_cast_fp16 = reshape(shape = var_573, x = var_571_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
tensor<fp16, [768, 768]> encoder_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_5_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86600640))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87190528))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 256, 768]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_137_cast_fp16 = add(x = input_127_cast_fp16, y = linear_11_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
tensor<int32, [1]> x_35_axes_0 = const()[name = tensor<string, []>("x_35_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("encoder_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87192128)))];
tensor<fp16, [1, 256, 768]> x_35_cast_fp16 = layer_norm(axes = x_35_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_5_norm_pos_ff_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("x_35_cast_fp16")];
tensor<int32, [3]> signal_21_perm_0 = const()[name = tensor<string, []>("signal_21_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 768, 256]> signal_21_cast_fp16 = transpose(perm = signal_21_perm_0, x = x_35_cast_fp16)[name = tensor<string, []>("transpose_37")];
tensor<fp16, [1, 768, 256]> input_139_cast_fp16 = mul(x = signal_21_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
tensor<int32, [6]> input_141_pad_0 = const()[name = tensor<string, []>("input_141_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_37_to_fp16 = const()[name = tensor<string, []>("const_37_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 768, 258]> input_141_cast_fp16 = pad(constant_val = const_37_to_fp16, mode = input_141_mode_0, pad = input_141_pad_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
tensor<string, []> conv_signal_21_pad_type_0 = const()[name = tensor<string, []>("conv_signal_21_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_21_strides_0 = const()[name = tensor<string, []>("conv_signal_21_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_21_pad_0 = const()[name = tensor<string, []>("conv_signal_21_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_21_dilations_0 = const()[name = tensor<string, []>("conv_signal_21_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_21_groups_0 = const()[name = tensor<string, []>("conv_signal_21_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 3]> encoder_layers_5_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_5_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87193728))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94271680))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11605504)))];
tensor<fp16, [1, 3072, 256]> conv_signal_21_cast_fp16 = conv(dilations = conv_signal_21_dilations_0, groups = conv_signal_21_groups_0, pad = conv_signal_21_pad_0, pad_type = conv_signal_21_pad_type_0, strides = conv_signal_21_strides_0, weight = encoder_layers_5_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_141_cast_fp16)[name = tensor<string, []>("conv_signal_21_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_143_cast_fp16 = mul(x = conv_signal_21_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
tensor<string, []> signal_mode_0 = const()[name = tensor<string, []>("signal_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 256]> signal_cast_fp16 = gelu(mode = signal_mode_0, x = input_143_cast_fp16)[name = tensor<string, []>("signal_cast_fp16")];
tensor<fp16, [1, 3072, 256]> input_145_cast_fp16 = mul(x = signal_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
tensor<int32, [6]> input_147_pad_0 = const()[name = tensor<string, []>("input_147_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 2, 0])];
tensor<string, []> input_147_mode_0 = const()[name = tensor<string, []>("input_147_mode_0"), val = tensor<string, []>("constant")];
tensor<fp16, []> const_38_to_fp16 = const()[name = tensor<string, []>("const_38_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
tensor<fp16, [1, 3072, 258]> input_147_cast_fp16 = pad(constant_val = const_38_to_fp16, mode = input_147_mode_0, pad = input_147_pad_0, x = input_145_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
tensor<string, []> conv_signal_pad_type_0 = const()[name = tensor<string, []>("conv_signal_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> conv_signal_strides_0 = const()[name = tensor<string, []>("conv_signal_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> conv_signal_pad_0 = const()[name = tensor<string, []>("conv_signal_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> conv_signal_dilations_0 = const()[name = tensor<string, []>("conv_signal_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> conv_signal_groups_0 = const()[name = tensor<string, []>("conv_signal_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 3]> encoder_layers_5_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_layers_5_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94277888))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101355840))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4521920)))];
tensor<fp16, [1, 768, 256]> conv_signal_cast_fp16 = conv(dilations = conv_signal_dilations_0, groups = conv_signal_groups_0, pad = conv_signal_pad_0, pad_type = conv_signal_pad_type_0, strides = conv_signal_strides_0, weight = encoder_layers_5_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor<string, []>("conv_signal_cast_fp16")];
tensor<fp16, [1, 768, 256]> var_611_cast_fp16 = mul(x = conv_signal_cast_fp16, y = var_77_cast_fp16)[name = tensor<string, []>("op_611_cast_fp16")];
tensor<int32, [3]> input_149_perm_0 = const()[name = tensor<string, []>("input_149_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 256, 768]> input_149_cast_fp16 = transpose(perm = input_149_perm_0, x = var_611_cast_fp16)[name = tensor<string, []>("transpose_36")];
tensor<fp16, [1, 256, 768]> x_cast_fp16 = add(x = input_137_cast_fp16, y = input_149_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
tensor<fp16, [1, 256, 768]> input_cast_fp16 = mul(x = x_cast_fp16, y = var_57_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
tensor<int32, [1]> var_619_axes_0 = const()[name = tensor<string, []>("op_619_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> encoder_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_norm_out_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101357440)))];
tensor<fp16, [1, 256, 768]> var_619_cast_fp16 = layer_norm(axes = var_619_axes_0, epsilon = var_15_to_fp16, gamma = encoder_norm_out_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_619_cast_fp16")];
tensor<string, []> var_619_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_619_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
tensor<fp32, [1, 256, 768]> encoder_output = cast(dtype = var_619_cast_fp16_to_fp32_dtype_0, x = var_619_cast_fp16)[name = tensor<string, []>("cast_1")];
} -> (encoder_output);
}