Akito
Replace with CoreML files
3ed074f
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
{
func main<ios16>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds, tensor<int32, [1]> input_ids, tensor<fp16, [1, 2304, 1, 448]> key_cache, tensor<fp16, [1, 448]> kv_cache_update_mask, tensor<fp16, [1, 2304, 1, 448]> value_cache) {
tensor<int32, []> var_22_axis_0 = const()[name = tensor<string, []>("op_22_axis_0"), val = tensor<int32, []>(0)];
tensor<int32, []> var_22_batch_dims_0 = const()[name = tensor<string, []>("op_22_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<fp16, [51865, 768]> embed_tokens_weight_to_fp16 = const()[name = tensor<string, []>("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
tensor<fp16, [1, 768]> var_22_cast_fp16 = gather(axis = var_22_axis_0, batch_dims = var_22_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor<string, []>("op_22_cast_fp16")];
tensor<int32, []> var_26_axis_0 = const()[name = tensor<string, []>("op_26_axis_0"), val = tensor<int32, []>(0)];
tensor<int32, []> var_26_batch_dims_0 = const()[name = tensor<string, []>("op_26_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<fp16, [448, 768]> embed_positions_weight_to_fp16 = const()[name = tensor<string, []>("embed_positions_weight_to_fp16"), val = tensor<fp16, [448, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79664768)))];
tensor<fp16, [1, 768]> var_26_cast_fp16 = gather(axis = var_26_axis_0, batch_dims = var_26_batch_dims_0, indices = cache_length, x = embed_positions_weight_to_fp16)[name = tensor<string, []>("op_26_cast_fp16")];
tensor<fp16, [1, 768]> hidden_states_1_cast_fp16 = add(x = var_22_cast_fp16, y = var_26_cast_fp16)[name = tensor<string, []>("hidden_states_1_cast_fp16")];
tensor<int32, [1]> var_40_axes_0 = const()[name = tensor<string, []>("op_40_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 768, 1]> var_40_cast_fp16 = expand_dims(axes = var_40_axes_0, x = hidden_states_1_cast_fp16)[name = tensor<string, []>("op_40_cast_fp16")];
tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
tensor<fp16, [1, 768, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_40_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
tensor<int32, [3]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [3]>([768, 768, 768])];
tensor<int32, []> var_45_axis_0 = const()[name = tensor<string, []>("op_45_axis_0"), val = tensor<int32, []>(1)];
tensor<fp16, [1, 768, 1, 448]> var_45_cast_fp16_0, tensor<fp16, [1, 768, 1, 448]> var_45_cast_fp16_1, tensor<fp16, [1, 768, 1, 448]> var_45_cast_fp16_2 = split(axis = var_45_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor<string, []>("op_45_cast_fp16")];
tensor<int32, [3]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [3]>([768, 768, 768])];
tensor<int32, []> var_51_axis_0 = const()[name = tensor<string, []>("op_51_axis_0"), val = tensor<int32, []>(1)];
tensor<fp16, [1, 768, 1, 448]> var_51_cast_fp16_0, tensor<fp16, [1, 768, 1, 448]> var_51_cast_fp16_1, tensor<fp16, [1, 768, 1, 448]> var_51_cast_fp16_2 = split(axis = var_51_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor<string, []>("op_51_cast_fp16")];
tensor<int32, []> var_60 = const()[name = tensor<string, []>("op_60"), val = tensor<int32, []>(3)];
tensor<int32, [1]> out_1_axes_0 = const()[name = tensor<string, []>("out_1_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_85_to_fp16 = const()[name = tensor<string, []>("op_85_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_85_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
tensor<fp16, [768]> obj_1_mean_0_to_fp16 = const()[name = tensor<string, []>("obj_1_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80352960)))];
tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = tensor<string, []>("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80354560)))];
tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80356160)))];
tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80357760)))];
tensor<fp16, []> obj_1_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_1_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor<string, []>("obj_1_cast_fp16")];
tensor<string, []> query_1_pad_type_0 = const()[name = tensor<string, []>("query_1_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> query_1_strides_0 = const()[name = tensor<string, []>("query_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_1_pad_0 = const()[name = tensor<string, []>("query_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_1_dilations_0 = const()[name = tensor<string, []>("query_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> query_1_groups_0 = const()[name = tensor<string, []>("query_1_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80359360)))];
tensor<fp16, [768]> layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81539072)))];
tensor<fp16, [1, 768, 1, 1]> query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
tensor<string, []> current_key_1_pad_type_0 = const()[name = tensor<string, []>("current_key_1_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> current_key_1_strides_0 = const()[name = tensor<string, []>("current_key_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> current_key_1_pad_0 = const()[name = tensor<string, []>("current_key_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> current_key_1_dilations_0 = const()[name = tensor<string, []>("current_key_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> current_key_1_groups_0 = const()[name = tensor<string, []>("current_key_1_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81540672)))];
tensor<fp16, [1, 768, 1, 1]> current_key_1_cast_fp16 = conv(dilations = current_key_1_dilations_0, groups = current_key_1_groups_0, pad = current_key_1_pad_0, pad_type = current_key_1_pad_type_0, strides = current_key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_key_1_cast_fp16")];
tensor<string, []> current_value_1_pad_type_0 = const()[name = tensor<string, []>("current_value_1_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> current_value_1_strides_0 = const()[name = tensor<string, []>("current_value_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> current_value_1_pad_0 = const()[name = tensor<string, []>("current_value_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> current_value_1_dilations_0 = const()[name = tensor<string, []>("current_value_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> current_value_1_groups_0 = const()[name = tensor<string, []>("current_value_1_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82720384)))];
tensor<fp16, [768]> layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83900096)))];
tensor<fp16, [1, 768, 1, 1]> current_value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = current_value_1_dilations_0, groups = current_value_1_groups_0, pad = current_value_1_pad_0, pad_type = current_value_1_pad_type_0, strides = current_value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor<string, []>("current_value_1_cast_fp16")];
tensor<int32, [1]> var_120_axes_0 = const()[name = tensor<string, []>("op_120_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [1, 1, 448]> var_120_cast_fp16 = expand_dims(axes = var_120_axes_0, x = kv_cache_update_mask)[name = tensor<string, []>("op_120_cast_fp16")];
tensor<int32, [1]> var_121_axes_0 = const()[name = tensor<string, []>("op_121_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 1, 1, 448]> var_121_cast_fp16 = expand_dims(axes = var_121_axes_0, x = var_120_cast_fp16)[name = tensor<string, []>("op_121_cast_fp16")];
tensor<fp16, []> var_61_to_fp16 = const()[name = tensor<string, []>("op_61_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 1, 1, 448]> var_123_cast_fp16 = sub(x = var_61_to_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_124_cast_fp16 = mul(x = var_45_cast_fp16_0, y = var_123_cast_fp16)[name = tensor<string, []>("op_124_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_125_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_125_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> key_1_cast_fp16 = add(x = var_124_cast_fp16, y = var_125_cast_fp16)[name = tensor<string, []>("key_1_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_128_cast_fp16 = mul(x = var_51_cast_fp16_0, y = var_123_cast_fp16)[name = tensor<string, []>("op_128_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_129_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_129_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> value_1_cast_fp16 = add(x = var_128_cast_fp16, y = var_129_cast_fp16)[name = tensor<string, []>("value_1_cast_fp16")];
tensor<int32, [4]> var_133 = const()[name = tensor<string, []>("op_133"), val = tensor<int32, [4]>([1, 12, 64, 1])];
tensor<fp16, [1, 12, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_133, x = query_1_cast_fp16)[name = tensor<string, []>("mh_q_1_cast_fp16")];
tensor<fp16, []> var_135_to_fp16 = const()[name = tensor<string, []>("op_135_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 64, 1]> var_136_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_135_to_fp16)[name = tensor<string, []>("op_136_cast_fp16")];
tensor<int32, [4]> var_139 = const()[name = tensor<string, []>("op_139"), val = tensor<int32, [4]>([1, 12, 64, 448])];
tensor<fp16, [1, 12, 64, 448]> var_140_cast_fp16 = reshape(shape = var_139, x = key_1_cast_fp16)[name = tensor<string, []>("op_140_cast_fp16")];
tensor<bool, []> mh_w_1_transpose_x_0 = const()[name = tensor<string, []>("mh_w_1_transpose_x_0"), val = tensor<bool, []>(true)];
tensor<bool, []> mh_w_1_transpose_y_0 = const()[name = tensor<string, []>("mh_w_1_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_136_cast_fp16, y = var_140_cast_fp16)[name = tensor<string, []>("mh_w_1_cast_fp16")];
tensor<int32, [1]> var_144_axes_0 = const()[name = tensor<string, []>("op_144_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [1, 1, 448]> var_144_cast_fp16 = expand_dims(axes = var_144_axes_0, x = decoder_key_padding_mask)[name = tensor<string, []>("op_144_cast_fp16")];
tensor<int32, [1]> var_145_axes_0 = const()[name = tensor<string, []>("op_145_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 1, 1, 448]> var_145_cast_fp16 = expand_dims(axes = var_145_axes_0, x = var_144_cast_fp16)[name = tensor<string, []>("op_145_cast_fp16")];
tensor<fp16, [1, 12, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_145_cast_fp16)[name = tensor<string, []>("mh_w_3_cast_fp16")];
tensor<fp16, [1, 12, 1, 448]> var_148_cast_fp16 = softmax(axis = var_60, x = mh_w_3_cast_fp16)[name = tensor<string, []>("op_148_cast_fp16")];
tensor<int32, [4]> var_149 = const()[name = tensor<string, []>("op_149"), val = tensor<int32, [4]>([1, 12, 64, 448])];
tensor<fp16, [1, 12, 64, 448]> var_150_cast_fp16 = reshape(shape = var_149, x = value_1_cast_fp16)[name = tensor<string, []>("op_150_cast_fp16")];
tensor<bool, []> attn_1_transpose_x_0 = const()[name = tensor<string, []>("attn_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_1_transpose_y_0 = const()[name = tensor<string, []>("attn_1_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<fp16, [1, 12, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_150_cast_fp16, y = var_148_cast_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
tensor<int32, [4]> var_153 = const()[name = tensor<string, []>("op_153"), val = tensor<int32, [4]>([1, 768, 1, 1])];
tensor<fp16, [1, 768, 1, 1]> input_1_cast_fp16 = reshape(shape = var_153, x = attn_1_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
tensor<string, []> obj_7_pad_type_0 = const()[name = tensor<string, []>("obj_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> obj_7_strides_0 = const()[name = tensor<string, []>("obj_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> obj_7_pad_0 = const()[name = tensor<string, []>("obj_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> obj_7_dilations_0 = const()[name = tensor<string, []>("obj_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> obj_7_groups_0 = const()[name = tensor<string, []>("obj_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83901696)))];
tensor<fp16, [768]> layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85081408)))];
tensor<fp16, [1, 768, 1, 1]> obj_7_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("obj_7_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
tensor<int32, [1]> out_3_axes_0 = const()[name = tensor<string, []>("out_3_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_175_to_fp16 = const()[name = tensor<string, []>("op_175_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_175_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85083008)))];
tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85084608)))];
tensor<fp16, []> obj_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor<string, []>("obj_9_cast_fp16")];
tensor<string, []> query_3_pad_type_0 = const()[name = tensor<string, []>("query_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> query_3_strides_0 = const()[name = tensor<string, []>("query_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_3_pad_0 = const()[name = tensor<string, []>("query_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_3_dilations_0 = const()[name = tensor<string, []>("query_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> query_3_groups_0 = const()[name = tensor<string, []>("query_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85086208)))];
tensor<fp16, [768]> layers_0_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86265920)))];
tensor<fp16, [1, 768, 1, 1]> query_3_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_0_encoder_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
tensor<string, []> key_3_pad_type_0 = const()[name = tensor<string, []>("key_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> key_3_strides_0 = const()[name = tensor<string, []>("key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_3_pad_0 = const()[name = tensor<string, []>("key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_3_dilations_0 = const()[name = tensor<string, []>("key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> key_3_groups_0 = const()[name = tensor<string, []>("key_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86267520)))];
tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_0_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_3_cast_fp16")];
tensor<string, []> value_3_pad_type_0 = const()[name = tensor<string, []>("value_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> value_3_strides_0 = const()[name = tensor<string, []>("value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_3_pad_0 = const()[name = tensor<string, []>("value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_3_dilations_0 = const()[name = tensor<string, []>("value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> value_3_groups_0 = const()[name = tensor<string, []>("value_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87447232)))];
tensor<fp16, [768]> layers_0_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88626944)))];
tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_0_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_3_cast_fp16")];
tensor<int32, [4]> var_211 = const()[name = tensor<string, []>("op_211"), val = tensor<int32, [4]>([1, 12, 64, 1])];
tensor<fp16, [1, 12, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_211, x = query_3_cast_fp16)[name = tensor<string, []>("mh_q_3_cast_fp16")];
tensor<fp16, []> var_213_to_fp16 = const()[name = tensor<string, []>("op_213_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 64, 1]> var_214_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_213_to_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
tensor<int32, [4]> var_217 = const()[name = tensor<string, []>("op_217"), val = tensor<int32, [4]>([1, 12, 64, 1500])];
tensor<fp16, [1, 12, 64, 1500]> var_218_cast_fp16 = reshape(shape = var_217, x = key_3_cast_fp16)[name = tensor<string, []>("op_218_cast_fp16")];
tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_214_cast_fp16, y = var_218_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
tensor<fp16, [1, 12, 1, 1500]> var_221_cast_fp16 = softmax(axis = var_60, x = mh_w_5_cast_fp16)[name = tensor<string, []>("op_221_cast_fp16")];
tensor<int32, [4]> var_222 = const()[name = tensor<string, []>("op_222"), val = tensor<int32, [4]>([1, 12, 64, 1500])];
tensor<fp16, [1, 12, 64, 1500]> var_223_cast_fp16 = reshape(shape = var_222, x = value_3_cast_fp16)[name = tensor<string, []>("op_223_cast_fp16")];
tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<fp16, [1, 12, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_223_cast_fp16, y = var_221_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
tensor<int32, [4]> var_226 = const()[name = tensor<string, []>("op_226"), val = tensor<int32, [4]>([1, 768, 1, 1])];
tensor<fp16, [1, 768, 1, 1]> input_3_cast_fp16 = reshape(shape = var_226, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
tensor<string, []> obj_11_pad_type_0 = const()[name = tensor<string, []>("obj_11_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> obj_11_strides_0 = const()[name = tensor<string, []>("obj_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> obj_11_pad_0 = const()[name = tensor<string, []>("obj_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> obj_11_dilations_0 = const()[name = tensor<string, []>("obj_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> obj_11_groups_0 = const()[name = tensor<string, []>("obj_11_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88628544)))];
tensor<fp16, [768]> layers_0_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89808256)))];
tensor<fp16, [1, 768, 1, 1]> obj_11_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_0_encoder_attn_o_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("obj_11_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
tensor<int32, [1]> out_5_axes_0 = const()[name = tensor<string, []>("out_5_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_244_to_fp16 = const()[name = tensor<string, []>("op_244_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_244_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
tensor<fp16, [768]> input_5_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89809856)))];
tensor<fp16, [768]> input_5_beta_0_to_fp16 = const()[name = tensor<string, []>("input_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89811456)))];
tensor<fp16, []> input_5_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_5_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
tensor<string, []> input_7_pad_type_0 = const()[name = tensor<string, []>("input_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> input_7_strides_0 = const()[name = tensor<string, []>("input_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_7_pad_0 = const()[name = tensor<string, []>("input_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_7_dilations_0 = const()[name = tensor<string, []>("input_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> input_7_groups_0 = const()[name = tensor<string, []>("input_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89813056)))];
tensor<fp16, [3072]> layers_0_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94531712)))];
tensor<fp16, [1, 3072, 1, 1]> input_7_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_7_dilations_0, groups = input_7_groups_0, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = input_7_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
tensor<string, []> input_9_mode_0 = const()[name = tensor<string, []>("input_9_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 3072, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
tensor<string, []> hidden_states_3_pad_type_0 = const()[name = tensor<string, []>("hidden_states_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> hidden_states_3_strides_0 = const()[name = tensor<string, []>("hidden_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_3_pad_0 = const()[name = tensor<string, []>("hidden_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_3_dilations_0 = const()[name = tensor<string, []>("hidden_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> hidden_states_3_groups_0 = const()[name = tensor<string, []>("hidden_states_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94537920)))];
tensor<fp16, [768]> layers_0_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_0_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99256576)))];
tensor<fp16, [1, 768, 1, 1]> hidden_states_3_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_3_dilations_0, groups = hidden_states_3_groups_0, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = hidden_states_3_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("hidden_states_3_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
tensor<int32, []> var_279 = const()[name = tensor<string, []>("op_279"), val = tensor<int32, []>(3)];
tensor<int32, [1]> out_7_axes_0 = const()[name = tensor<string, []>("out_7_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_304_to_fp16 = const()[name = tensor<string, []>("op_304_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_304_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99258176)))];
tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99259776)))];
tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> query_5_strides_0 = const()[name = tensor<string, []>("query_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_5_dilations_0 = const()[name = tensor<string, []>("query_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> query_5_groups_0 = const()[name = tensor<string, []>("query_5_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99261376)))];
tensor<fp16, [768]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100441088)))];
tensor<fp16, [1, 768, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
tensor<string, []> current_key_3_pad_type_0 = const()[name = tensor<string, []>("current_key_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> current_key_3_strides_0 = const()[name = tensor<string, []>("current_key_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> current_key_3_pad_0 = const()[name = tensor<string, []>("current_key_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> current_key_3_dilations_0 = const()[name = tensor<string, []>("current_key_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> current_key_3_groups_0 = const()[name = tensor<string, []>("current_key_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100442688)))];
tensor<fp16, [1, 768, 1, 1]> current_key_3_cast_fp16 = conv(dilations = current_key_3_dilations_0, groups = current_key_3_groups_0, pad = current_key_3_pad_0, pad_type = current_key_3_pad_type_0, strides = current_key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("current_key_3_cast_fp16")];
tensor<string, []> current_value_3_pad_type_0 = const()[name = tensor<string, []>("current_value_3_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> current_value_3_strides_0 = const()[name = tensor<string, []>("current_value_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> current_value_3_pad_0 = const()[name = tensor<string, []>("current_value_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> current_value_3_dilations_0 = const()[name = tensor<string, []>("current_value_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> current_value_3_groups_0 = const()[name = tensor<string, []>("current_value_3_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101622400)))];
tensor<fp16, [768]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102802112)))];
tensor<fp16, [1, 768, 1, 1]> current_value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = current_value_3_dilations_0, groups = current_value_3_groups_0, pad = current_value_3_pad_0, pad_type = current_value_3_pad_type_0, strides = current_value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("current_value_3_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_343_cast_fp16 = mul(x = var_45_cast_fp16_1, y = var_123_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_344_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_344_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> key_5_cast_fp16 = add(x = var_343_cast_fp16, y = var_344_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_347_cast_fp16 = mul(x = var_51_cast_fp16_1, y = var_123_cast_fp16)[name = tensor<string, []>("op_347_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_348_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> value_5_cast_fp16 = add(x = var_347_cast_fp16, y = var_348_cast_fp16)[name = tensor<string, []>("value_5_cast_fp16")];
tensor<int32, [4]> var_352 = const()[name = tensor<string, []>("op_352"), val = tensor<int32, [4]>([1, 12, 64, 1])];
tensor<fp16, [1, 12, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_352, x = query_5_cast_fp16)[name = tensor<string, []>("mh_q_5_cast_fp16")];
tensor<fp16, []> var_354_to_fp16 = const()[name = tensor<string, []>("op_354_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 64, 1]> var_355_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_354_to_fp16)[name = tensor<string, []>("op_355_cast_fp16")];
tensor<int32, [4]> var_358 = const()[name = tensor<string, []>("op_358"), val = tensor<int32, [4]>([1, 12, 64, 448])];
tensor<fp16, [1, 12, 64, 448]> var_359_cast_fp16 = reshape(shape = var_358, x = key_5_cast_fp16)[name = tensor<string, []>("op_359_cast_fp16")];
tensor<bool, []> mh_w_7_transpose_x_0 = const()[name = tensor<string, []>("mh_w_7_transpose_x_0"), val = tensor<bool, []>(true)];
tensor<bool, []> mh_w_7_transpose_y_0 = const()[name = tensor<string, []>("mh_w_7_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 1, 448]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_355_cast_fp16, y = var_359_cast_fp16)[name = tensor<string, []>("mh_w_7_cast_fp16")];
tensor<fp16, [1, 12, 1, 448]> mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_145_cast_fp16)[name = tensor<string, []>("mh_w_9_cast_fp16")];
tensor<fp16, [1, 12, 1, 448]> var_367_cast_fp16 = softmax(axis = var_279, x = mh_w_9_cast_fp16)[name = tensor<string, []>("op_367_cast_fp16")];
tensor<int32, [4]> var_368 = const()[name = tensor<string, []>("op_368"), val = tensor<int32, [4]>([1, 12, 64, 448])];
tensor<fp16, [1, 12, 64, 448]> var_369_cast_fp16 = reshape(shape = var_368, x = value_5_cast_fp16)[name = tensor<string, []>("op_369_cast_fp16")];
tensor<bool, []> attn_5_transpose_x_0 = const()[name = tensor<string, []>("attn_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_5_transpose_y_0 = const()[name = tensor<string, []>("attn_5_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<fp16, [1, 12, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_369_cast_fp16, y = var_367_cast_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
tensor<int32, [4]> var_372 = const()[name = tensor<string, []>("op_372"), val = tensor<int32, [4]>([1, 768, 1, 1])];
tensor<fp16, [1, 768, 1, 1]> input_11_cast_fp16 = reshape(shape = var_372, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> obj_19_strides_0 = const()[name = tensor<string, []>("obj_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> obj_19_dilations_0 = const()[name = tensor<string, []>("obj_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> obj_19_groups_0 = const()[name = tensor<string, []>("obj_19_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102803712)))];
tensor<fp16, [768]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103983424)))];
tensor<fp16, [1, 768, 1, 1]> obj_19_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
tensor<int32, [1]> out_9_axes_0 = const()[name = tensor<string, []>("out_9_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_394_to_fp16 = const()[name = tensor<string, []>("op_394_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_394_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
tensor<fp16, [768]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103985024)))];
tensor<fp16, [768]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103986624)))];
tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
tensor<string, []> query_7_pad_type_0 = const()[name = tensor<string, []>("query_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> query_7_strides_0 = const()[name = tensor<string, []>("query_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_7_pad_0 = const()[name = tensor<string, []>("query_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_7_dilations_0 = const()[name = tensor<string, []>("query_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> query_7_groups_0 = const()[name = tensor<string, []>("query_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103988224)))];
tensor<fp16, [768]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105167936)))];
tensor<fp16, [1, 768, 1, 1]> query_7_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
tensor<string, []> key_7_pad_type_0 = const()[name = tensor<string, []>("key_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> key_7_strides_0 = const()[name = tensor<string, []>("key_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_7_pad_0 = const()[name = tensor<string, []>("key_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_7_dilations_0 = const()[name = tensor<string, []>("key_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> key_7_groups_0 = const()[name = tensor<string, []>("key_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105169536)))];
tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = conv(dilations = key_7_dilations_0, groups = key_7_groups_0, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = key_7_strides_0, weight = layers_1_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_7_cast_fp16")];
tensor<string, []> value_7_pad_type_0 = const()[name = tensor<string, []>("value_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> value_7_strides_0 = const()[name = tensor<string, []>("value_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_7_pad_0 = const()[name = tensor<string, []>("value_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_7_dilations_0 = const()[name = tensor<string, []>("value_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> value_7_groups_0 = const()[name = tensor<string, []>("value_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106349248)))];
tensor<fp16, [768]> layers_1_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107528960)))];
tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_bias_to_fp16, dilations = value_7_dilations_0, groups = value_7_groups_0, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = value_7_strides_0, weight = layers_1_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_7_cast_fp16")];
tensor<int32, [4]> var_430 = const()[name = tensor<string, []>("op_430"), val = tensor<int32, [4]>([1, 12, 64, 1])];
tensor<fp16, [1, 12, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_430, x = query_7_cast_fp16)[name = tensor<string, []>("mh_q_7_cast_fp16")];
tensor<fp16, []> var_432_to_fp16 = const()[name = tensor<string, []>("op_432_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 64, 1]> var_433_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_432_to_fp16)[name = tensor<string, []>("op_433_cast_fp16")];
tensor<int32, [4]> var_436 = const()[name = tensor<string, []>("op_436"), val = tensor<int32, [4]>([1, 12, 64, 1500])];
tensor<fp16, [1, 12, 64, 1500]> var_437_cast_fp16 = reshape(shape = var_436, x = key_7_cast_fp16)[name = tensor<string, []>("op_437_cast_fp16")];
tensor<bool, []> mh_w_11_transpose_x_0 = const()[name = tensor<string, []>("mh_w_11_transpose_x_0"), val = tensor<bool, []>(true)];
tensor<bool, []> mh_w_11_transpose_y_0 = const()[name = tensor<string, []>("mh_w_11_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 1, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_433_cast_fp16, y = var_437_cast_fp16)[name = tensor<string, []>("mh_w_11_cast_fp16")];
tensor<fp16, [1, 12, 1, 1500]> var_440_cast_fp16 = softmax(axis = var_279, x = mh_w_11_cast_fp16)[name = tensor<string, []>("op_440_cast_fp16")];
tensor<int32, [4]> var_441 = const()[name = tensor<string, []>("op_441"), val = tensor<int32, [4]>([1, 12, 64, 1500])];
tensor<fp16, [1, 12, 64, 1500]> var_442_cast_fp16 = reshape(shape = var_441, x = value_7_cast_fp16)[name = tensor<string, []>("op_442_cast_fp16")];
tensor<bool, []> attn_7_transpose_x_0 = const()[name = tensor<string, []>("attn_7_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_7_transpose_y_0 = const()[name = tensor<string, []>("attn_7_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<fp16, [1, 12, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_442_cast_fp16, y = var_440_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
tensor<int32, [4]> var_445 = const()[name = tensor<string, []>("op_445"), val = tensor<int32, [4]>([1, 768, 1, 1])];
tensor<fp16, [1, 768, 1, 1]> input_13_cast_fp16 = reshape(shape = var_445, x = attn_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
tensor<string, []> obj_23_pad_type_0 = const()[name = tensor<string, []>("obj_23_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> obj_23_strides_0 = const()[name = tensor<string, []>("obj_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> obj_23_pad_0 = const()[name = tensor<string, []>("obj_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> obj_23_dilations_0 = const()[name = tensor<string, []>("obj_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> obj_23_groups_0 = const()[name = tensor<string, []>("obj_23_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107530560)))];
tensor<fp16, [768]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108710272)))];
tensor<fp16, [1, 768, 1, 1]> obj_23_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = obj_23_dilations_0, groups = obj_23_groups_0, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = obj_23_strides_0, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_23_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
tensor<int32, [1]> out_11_axes_0 = const()[name = tensor<string, []>("out_11_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_463_to_fp16 = const()[name = tensor<string, []>("op_463_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_463_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
tensor<fp16, [768]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108711872)))];
tensor<fp16, [768]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108713472)))];
tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> input_17_strides_0 = const()[name = tensor<string, []>("input_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_17_dilations_0 = const()[name = tensor<string, []>("input_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> input_17_groups_0 = const()[name = tensor<string, []>("input_17_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108715072)))];
tensor<fp16, [3072]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113433728)))];
tensor<fp16, [1, 3072, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 3072, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> hidden_states_5_strides_0 = const()[name = tensor<string, []>("hidden_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_5_dilations_0 = const()[name = tensor<string, []>("hidden_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> hidden_states_5_groups_0 = const()[name = tensor<string, []>("hidden_states_5_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113439936)))];
tensor<fp16, [768]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118158592)))];
tensor<fp16, [1, 768, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
tensor<int32, []> var_498 = const()[name = tensor<string, []>("op_498"), val = tensor<int32, []>(3)];
tensor<int32, [1]> out_13_axes_0 = const()[name = tensor<string, []>("out_13_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_523_to_fp16 = const()[name = tensor<string, []>("op_523_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_523_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118160192)))];
tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118161792)))];
tensor<fp16, []> obj_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
tensor<string, []> query_9_pad_type_0 = const()[name = tensor<string, []>("query_9_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> query_9_strides_0 = const()[name = tensor<string, []>("query_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_9_pad_0 = const()[name = tensor<string, []>("query_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_9_dilations_0 = const()[name = tensor<string, []>("query_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> query_9_groups_0 = const()[name = tensor<string, []>("query_9_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118163392)))];
tensor<fp16, [768]> layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119343104)))];
tensor<fp16, [1, 768, 1, 1]> query_9_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_9_dilations_0, groups = query_9_groups_0, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = query_9_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> current_key_strides_0 = const()[name = tensor<string, []>("current_key_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> current_key_dilations_0 = const()[name = tensor<string, []>("current_key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> current_key_groups_0 = const()[name = tensor<string, []>("current_key_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119344704)))];
tensor<fp16, [1, 768, 1, 1]> current_key_cast_fp16 = conv(dilations = current_key_dilations_0, groups = current_key_groups_0, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = current_key_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> current_value_strides_0 = const()[name = tensor<string, []>("current_value_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> current_value_dilations_0 = const()[name = tensor<string, []>("current_value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> current_value_groups_0 = const()[name = tensor<string, []>("current_value_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120524416)))];
tensor<fp16, [768]> layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121704128)))];
tensor<fp16, [1, 768, 1, 1]> current_value_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = current_value_dilations_0, groups = current_value_groups_0, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = current_value_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_562_cast_fp16 = mul(x = var_45_cast_fp16_2, y = var_123_cast_fp16)[name = tensor<string, []>("op_562_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_563_cast_fp16 = mul(x = current_key_cast_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> key_9_cast_fp16 = add(x = var_562_cast_fp16, y = var_563_cast_fp16)[name = tensor<string, []>("key_9_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_566_cast_fp16 = mul(x = var_51_cast_fp16_2, y = var_123_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> var_567_cast_fp16 = mul(x = current_value_cast_fp16, y = var_121_cast_fp16)[name = tensor<string, []>("op_567_cast_fp16")];
tensor<fp16, [1, 768, 1, 448]> value_9_cast_fp16 = add(x = var_566_cast_fp16, y = var_567_cast_fp16)[name = tensor<string, []>("value_9_cast_fp16")];
tensor<int32, [4]> var_571 = const()[name = tensor<string, []>("op_571"), val = tensor<int32, [4]>([1, 12, 64, 1])];
tensor<fp16, [1, 12, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_571, x = query_9_cast_fp16)[name = tensor<string, []>("mh_q_9_cast_fp16")];
tensor<fp16, []> var_573_to_fp16 = const()[name = tensor<string, []>("op_573_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 64, 1]> var_574_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_573_to_fp16)[name = tensor<string, []>("op_574_cast_fp16")];
tensor<int32, [4]> var_577 = const()[name = tensor<string, []>("op_577"), val = tensor<int32, [4]>([1, 12, 64, 448])];
tensor<fp16, [1, 12, 64, 448]> var_578_cast_fp16 = reshape(shape = var_577, x = key_9_cast_fp16)[name = tensor<string, []>("op_578_cast_fp16")];
tensor<bool, []> mh_w_13_transpose_x_0 = const()[name = tensor<string, []>("mh_w_13_transpose_x_0"), val = tensor<bool, []>(true)];
tensor<bool, []> mh_w_13_transpose_y_0 = const()[name = tensor<string, []>("mh_w_13_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 1, 448]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_574_cast_fp16, y = var_578_cast_fp16)[name = tensor<string, []>("mh_w_13_cast_fp16")];
tensor<fp16, [1, 12, 1, 448]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_145_cast_fp16)[name = tensor<string, []>("mh_w_15_cast_fp16")];
tensor<fp16, [1, 12, 1, 448]> var_586_cast_fp16 = softmax(axis = var_498, x = mh_w_15_cast_fp16)[name = tensor<string, []>("op_586_cast_fp16")];
tensor<int32, [4]> var_587 = const()[name = tensor<string, []>("op_587"), val = tensor<int32, [4]>([1, 12, 64, 448])];
tensor<fp16, [1, 12, 64, 448]> var_588_cast_fp16 = reshape(shape = var_587, x = value_9_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
tensor<bool, []> attn_9_transpose_x_0 = const()[name = tensor<string, []>("attn_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_9_transpose_y_0 = const()[name = tensor<string, []>("attn_9_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<fp16, [1, 12, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_588_cast_fp16, y = var_586_cast_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
tensor<int32, [4]> var_591 = const()[name = tensor<string, []>("op_591"), val = tensor<int32, [4]>([1, 768, 1, 1])];
tensor<fp16, [1, 768, 1, 1]> input_21_cast_fp16 = reshape(shape = var_591, x = attn_9_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
tensor<string, []> obj_31_pad_type_0 = const()[name = tensor<string, []>("obj_31_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> obj_31_strides_0 = const()[name = tensor<string, []>("obj_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> obj_31_pad_0 = const()[name = tensor<string, []>("obj_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> obj_31_dilations_0 = const()[name = tensor<string, []>("obj_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> obj_31_groups_0 = const()[name = tensor<string, []>("obj_31_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121705728)))];
tensor<fp16, [768]> layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122885440)))];
tensor<fp16, [1, 768, 1, 1]> obj_31_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_31_dilations_0, groups = obj_31_groups_0, pad = obj_31_pad_0, pad_type = obj_31_pad_type_0, strides = obj_31_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("obj_31_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_31_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
tensor<int32, [1]> out_15_axes_0 = const()[name = tensor<string, []>("out_15_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_613_to_fp16 = const()[name = tensor<string, []>("op_613_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_613_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122887040)))];
tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122888640)))];
tensor<fp16, []> obj_33_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_33_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor<string, []>("obj_33_cast_fp16")];
tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> query_strides_0 = const()[name = tensor<string, []>("query_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_dilations_0 = const()[name = tensor<string, []>("query_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> query_groups_0 = const()[name = tensor<string, []>("query_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122890240)))];
tensor<fp16, [768]> layers_2_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124069952)))];
tensor<fp16, [1, 768, 1, 1]> query_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_2_encoder_attn_q_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> key_strides_0 = const()[name = tensor<string, []>("key_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_pad_0 = const()[name = tensor<string, []>("key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_dilations_0 = const()[name = tensor<string, []>("key_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> key_groups_0 = const()[name = tensor<string, []>("key_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124071552)))];
tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_2_encoder_attn_k_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("key_cast_fp16")];
tensor<string, []> value_pad_type_0 = const()[name = tensor<string, []>("value_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> value_strides_0 = const()[name = tensor<string, []>("value_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_pad_0 = const()[name = tensor<string, []>("value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_dilations_0 = const()[name = tensor<string, []>("value_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> value_groups_0 = const()[name = tensor<string, []>("value_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125251264)))];
tensor<fp16, [768]> layers_2_encoder_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126430976)))];
tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_2_encoder_attn_v_proj_weight_to_fp16, x = encoder_output_embeds)[name = tensor<string, []>("value_cast_fp16")];
tensor<int32, [4]> var_649 = const()[name = tensor<string, []>("op_649"), val = tensor<int32, [4]>([1, 12, 64, 1])];
tensor<fp16, [1, 12, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_649, x = query_cast_fp16)[name = tensor<string, []>("mh_q_cast_fp16")];
tensor<fp16, []> var_651_to_fp16 = const()[name = tensor<string, []>("op_651_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 64, 1]> var_652_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_651_to_fp16)[name = tensor<string, []>("op_652_cast_fp16")];
tensor<int32, [4]> var_655 = const()[name = tensor<string, []>("op_655"), val = tensor<int32, [4]>([1, 12, 64, 1500])];
tensor<fp16, [1, 12, 64, 1500]> var_656_cast_fp16 = reshape(shape = var_655, x = key_cast_fp16)[name = tensor<string, []>("op_656_cast_fp16")];
tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_652_cast_fp16, y = var_656_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
tensor<fp16, [1, 12, 1, 1500]> var_659_cast_fp16 = softmax(axis = var_498, x = mh_w_cast_fp16)[name = tensor<string, []>("op_659_cast_fp16")];
tensor<int32, [4]> var_660 = const()[name = tensor<string, []>("op_660"), val = tensor<int32, [4]>([1, 12, 64, 1500])];
tensor<fp16, [1, 12, 64, 1500]> var_661_cast_fp16 = reshape(shape = var_660, x = value_cast_fp16)[name = tensor<string, []>("op_661_cast_fp16")];
tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<fp16, [1, 12, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_661_cast_fp16, y = var_659_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
tensor<int32, [4]> var_664 = const()[name = tensor<string, []>("op_664"), val = tensor<int32, [4]>([1, 768, 1, 1])];
tensor<fp16, [1, 768, 1, 1]> input_23_cast_fp16 = reshape(shape = var_664, x = attn_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
tensor<string, []> obj_35_pad_type_0 = const()[name = tensor<string, []>("obj_35_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> obj_35_strides_0 = const()[name = tensor<string, []>("obj_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> obj_35_pad_0 = const()[name = tensor<string, []>("obj_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> obj_35_dilations_0 = const()[name = tensor<string, []>("obj_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> obj_35_groups_0 = const()[name = tensor<string, []>("obj_35_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126432576)))];
tensor<fp16, [768]> layers_2_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127612288)))];
tensor<fp16, [1, 768, 1, 1]> obj_35_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_2_encoder_attn_o_proj_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("obj_35_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_35_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
tensor<int32, [1]> out_17_axes_0 = const()[name = tensor<string, []>("out_17_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_682_to_fp16 = const()[name = tensor<string, []>("op_682_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_682_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
tensor<fp16, [768]> input_25_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127613888)))];
tensor<fp16, [768]> input_25_beta_0_to_fp16 = const()[name = tensor<string, []>("input_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127615488)))];
tensor<fp16, []> input_25_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_25_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
tensor<string, []> input_27_pad_type_0 = const()[name = tensor<string, []>("input_27_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> input_27_strides_0 = const()[name = tensor<string, []>("input_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_27_pad_0 = const()[name = tensor<string, []>("input_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_27_dilations_0 = const()[name = tensor<string, []>("input_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> input_27_groups_0 = const()[name = tensor<string, []>("input_27_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127617088)))];
tensor<fp16, [3072]> layers_2_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132335744)))];
tensor<fp16, [1, 3072, 1, 1]> input_27_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 3072, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_27_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
tensor<string, []> hidden_states_7_pad_type_0 = const()[name = tensor<string, []>("hidden_states_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = tensor<string, []>("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = tensor<string, []>("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = tensor<string, []>("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, []> hidden_states_7_groups_0 = const()[name = tensor<string, []>("hidden_states_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132341952)))];
tensor<fp16, [768]> layers_2_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_2_fc2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137060608)))];
tensor<fp16, [1, 768, 1, 1]> hidden_states_7_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
tensor<fp16, [1, 768, 1, 1]> inputs_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
tensor<int32, [1]> out_axes_0 = const()[name = tensor<string, []>("out_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, []> var_724_to_fp16 = const()[name = tensor<string, []>("op_724_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_724_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
tensor<fp16, [768]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137062208)))];
tensor<fp16, [768]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137063808)))];
tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 768, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
tensor<int32, [1]> var_735_axes_0 = const()[name = tensor<string, []>("op_735_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 768, 1]> var_735_cast_fp16 = squeeze(axes = var_735_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_735_cast_fp16")];
tensor<int32, [3]> var_738_perm_0 = const()[name = tensor<string, []>("op_738_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137065408)))];
tensor<fp16, [1, 1, 768]> var_738_cast_fp16 = transpose(perm = var_738_perm_0, x = var_735_cast_fp16)[name = tensor<string, []>("transpose_0")];
tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_738_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
tensor<int32, []> var_742 = const()[name = tensor<string, []>("op_742"), val = tensor<int32, []>(1)];
tensor<bool, []> obj_39_interleave_0 = const()[name = tensor<string, []>("obj_39_interleave_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 2304, 1, 1]> key_cache_updates = concat(axis = var_742, interleave = obj_39_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_39_cast_fp16")];
tensor<int32, []> var_745 = const()[name = tensor<string, []>("op_745"), val = tensor<int32, []>(1)];
tensor<bool, []> obj_interleave_0 = const()[name = tensor<string, []>("obj_interleave_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 2304, 1, 1]> value_cache_updates = concat(axis = var_745, interleave = obj_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_cast_fp16")];
} -> (logits, key_cache_updates, value_cache_updates);
}