philippdxx's picture
Upload folder using huggingface_hub
0f217f2 verified
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.7.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
{
func main<ios15>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> input_ids) {
tensor<int32, []> inputs_embeds_axis_0 = const()[name = tensor<string, []>("inputs_embeds_axis_0"), val = tensor<int32, []>(0)];
tensor<fp16, [28996, 768]> model_bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [28996, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
tensor<fp16, [1, 128, 768]> inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, indices = input_ids, x = model_bert_embeddings_word_embeddings_weight_to_fp16)[name = tensor<string, []>("inputs_embeds_cast_fp16")];
tensor<fp16, [1, 128, 768]> token_type_embeddings_1_to_fp16 = const()[name = tensor<string, []>("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44537984)))];
tensor<fp16, [1, 128, 768]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = token_type_embeddings_1_to_fp16)[name = tensor<string, []>("embeddings_1_cast_fp16")];
tensor<fp16, [1, 128, 768]> position_embeddings_1_to_fp16 = const()[name = tensor<string, []>("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44734656)))];
tensor<fp16, [1, 128, 768]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44931328)))];
tensor<fp16, [768]> model_bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44932928)))];
tensor<fp16, []> var_10_to_fp16 = const()[name = tensor<string, []>("op_10_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
tensor<fp16, [1, 128, 768]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = model_bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
tensor<int32, [1]> var_65_axes_0 = const()[name = tensor<string, []>("op_65_axes_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [1, 1, 128]> var_65 = expand_dims(axes = var_65_axes_0, x = attention_mask)[name = tensor<string, []>("op_65")];
tensor<int32, [1]> var_66_axes_0 = const()[name = tensor<string, []>("op_66_axes_0"), val = tensor<int32, [1]>([2])];
tensor<int32, [1, 1, 1, 128]> var_66 = expand_dims(axes = var_66_axes_0, x = var_65)[name = tensor<string, []>("op_66")];
tensor<int32, [4]> var_69_reps_0 = const()[name = tensor<string, []>("op_69_reps_0"), val = tensor<int32, [4]>([1, 1, 128, 1])];
tensor<int32, [1, 1, 128, 128]> var_69 = tile(reps = var_69_reps_0, x = var_66)[name = tensor<string, []>("op_69")];
tensor<fp16, []> const_5_to_fp16 = const()[name = tensor<string, []>("const_5_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<string, []> cast_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [1, 1, 128, 128]> var_69_to_fp16 = cast(dtype = cast_3_to_fp16_dtype_0, x = var_69)[name = tensor<string, []>("cast_55")];
tensor<fp16, [1, 1, 128, 128]> inverted_mask_cast_fp16 = sub(x = const_5_to_fp16, y = var_69_to_fp16)[name = tensor<string, []>("inverted_mask_cast_fp16")];
tensor<string, []> cast_4_dtype_0 = const()[name = tensor<string, []>("cast_4_dtype_0"), val = tensor<string, []>("bool")];
tensor<fp16, []> var_20_to_fp16 = const()[name = tensor<string, []>("op_20_to_fp16"), val = tensor<fp16, []>(-inf)];
tensor<bool, [1, 1, 128, 128]> inverted_mask_cast_fp16_to_bool = cast(dtype = cast_4_dtype_0, x = inverted_mask_cast_fp16)[name = tensor<string, []>("cast_54")];
tensor<fp16, [1, 1, 128, 128]> attention_mask_cast_fp16 = select(a = var_20_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = tensor<string, []>("attention_mask_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_0_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44934528)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46114240)))];
tensor<fp16, [1, 128, 768]> linear_0_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_self_query_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
tensor<int32, [4]> var_115 = const()[name = tensor<string, []>("op_115"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_3_cast_fp16 = reshape(shape = var_115, x = linear_0_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_0_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46115840)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47295552)))];
tensor<fp16, [1, 128, 768]> linear_1_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_self_key_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
tensor<int32, [4]> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_7_cast_fp16 = reshape(shape = var_124, x = linear_1_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_0_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47297152)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48476864)))];
tensor<fp16, [1, 128, 768]> linear_2_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_self_value_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
tensor<int32, [4]> var_133 = const()[name = tensor<string, []>("op_133"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_11_cast_fp16 = reshape(shape = var_133, x = linear_2_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
tensor<int32, [4]> var_135 = const()[name = tensor<string, []>("op_135"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_0_y_0_to_fp16 = const()[name = tensor<string, []>("mul_0_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_0_cast_fp16 = mul(x = x_3_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor<string, []>("mul_0_cast_fp16")];
tensor<bool, []> matmul_0_transpose_y_0 = const()[name = tensor<string, []>("matmul_0_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_0_transpose_x_0 = const()[name = tensor<string, []>("matmul_0_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_48_perm_0 = const()[name = tensor<string, []>("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_49_perm_0 = const()[name = tensor<string, []>("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_49 = transpose(perm = transpose_49_perm_0, x = x_7_cast_fp16)[name = tensor<string, []>("transpose_117")];
tensor<fp16, [1, 12, 128, 64]> transpose_48 = transpose(perm = transpose_48_perm_0, x = mul_0_cast_fp16)[name = tensor<string, []>("transpose_118")];
tensor<fp16, [1, 12, 128, 128]> matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = transpose_48, y = transpose_49)[name = tensor<string, []>("matmul_0_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_0_cast_fp16 = add(x = matmul_0_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_0_cast_fp16")];
tensor<int32, []> softmax_0_axis_0 = const()[name = tensor<string, []>("softmax_0_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_0_cast_fp16 = softmax(axis = softmax_0_axis_0, x = add_0_cast_fp16)[name = tensor<string, []>("softmax_0_cast_fp16")];
tensor<bool, []> attn_output_1_transpose_x_0 = const()[name = tensor<string, []>("attn_output_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_1_transpose_y_0 = const()[name = tensor<string, []>("attn_output_1_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_1_cast_fp16 = transpose(perm = var_135, x = x_11_cast_fp16)[name = tensor<string, []>("transpose_119")];
tensor<fp16, [1, 12, 128, 64]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = softmax_0_cast_fp16, y = value_layer_1_cast_fp16)[name = tensor<string, []>("attn_output_1_cast_fp16")];
tensor<int32, [4]> attn_output_3_perm_0 = const()[name = tensor<string, []>("attn_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_139 = const()[name = tensor<string, []>("op_139"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_3_cast_fp16 = transpose(perm = attn_output_3_perm_0, x = attn_output_1_cast_fp16)[name = tensor<string, []>("transpose_116")];
tensor<fp16, [1, 128, 768]> input_9_cast_fp16 = reshape(shape = var_139, x = attn_output_3_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_0_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48478464)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49658176)))];
tensor<fp16, [1, 128, 768]> linear_3_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_output_dense_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_7_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
tensor<int32, [1]> input_15_axes_0 = const()[name = tensor<string, []>("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49659776)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49661376)))];
tensor<fp16, [1, 128, 768]> input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = model_bert_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_0_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49662976)))];
tensor<fp16, [3072]> model_bert_encoder_layer_0_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54381632)))];
tensor<fp16, [1, 128, 3072]> linear_4_cast_fp16 = linear(bias = model_bert_encoder_layer_0_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_0_intermediate_dense_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_0_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54387840)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59106496)))];
tensor<fp16, [1, 128, 768]> linear_5_cast_fp16 = linear(bias = model_bert_encoder_layer_0_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_0_output_dense_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = tensor<string, []>("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_0_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59108096)))];
tensor<fp16, [768]> model_bert_encoder_layer_0_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_0_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59109696)))];
tensor<fp16, [1, 128, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = model_bert_encoder_layer_0_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_0_output_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("hidden_states_7_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_1_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59111296)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60291008)))];
tensor<fp16, [1, 128, 768]> linear_6_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_self_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
tensor<int32, [4]> var_183 = const()[name = tensor<string, []>("op_183"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_15_cast_fp16 = reshape(shape = var_183, x = linear_6_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_1_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60292608)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61472320)))];
tensor<fp16, [1, 128, 768]> linear_7_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_self_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
tensor<int32, [4]> var_192 = const()[name = tensor<string, []>("op_192"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_19_cast_fp16 = reshape(shape = var_192, x = linear_7_cast_fp16)[name = tensor<string, []>("x_19_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_1_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61473920)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62653632)))];
tensor<fp16, [1, 128, 768]> linear_8_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_self_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
tensor<int32, [4]> var_201 = const()[name = tensor<string, []>("op_201"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_23_cast_fp16 = reshape(shape = var_201, x = linear_8_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")];
tensor<int32, [4]> var_203 = const()[name = tensor<string, []>("op_203"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_1_y_0_to_fp16 = const()[name = tensor<string, []>("mul_1_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_1_cast_fp16 = mul(x = x_15_cast_fp16, y = mul_1_y_0_to_fp16)[name = tensor<string, []>("mul_1_cast_fp16")];
tensor<bool, []> matmul_1_transpose_y_0 = const()[name = tensor<string, []>("matmul_1_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_1_transpose_x_0 = const()[name = tensor<string, []>("matmul_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_50_perm_0 = const()[name = tensor<string, []>("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_51_perm_0 = const()[name = tensor<string, []>("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_51 = transpose(perm = transpose_51_perm_0, x = x_19_cast_fp16)[name = tensor<string, []>("transpose_113")];
tensor<fp16, [1, 12, 128, 64]> transpose_50 = transpose(perm = transpose_50_perm_0, x = mul_1_cast_fp16)[name = tensor<string, []>("transpose_114")];
tensor<fp16, [1, 12, 128, 128]> matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = transpose_50, y = transpose_51)[name = tensor<string, []>("matmul_1_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_1_cast_fp16 = add(x = matmul_1_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_1_cast_fp16")];
tensor<int32, []> softmax_1_axis_0 = const()[name = tensor<string, []>("softmax_1_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_1_cast_fp16 = softmax(axis = softmax_1_axis_0, x = add_1_cast_fp16)[name = tensor<string, []>("softmax_1_cast_fp16")];
tensor<bool, []> attn_output_5_transpose_x_0 = const()[name = tensor<string, []>("attn_output_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_5_transpose_y_0 = const()[name = tensor<string, []>("attn_output_5_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_3_cast_fp16 = transpose(perm = var_203, x = x_23_cast_fp16)[name = tensor<string, []>("transpose_115")];
tensor<fp16, [1, 12, 128, 64]> attn_output_5_cast_fp16 = matmul(transpose_x = attn_output_5_transpose_x_0, transpose_y = attn_output_5_transpose_y_0, x = softmax_1_cast_fp16, y = value_layer_3_cast_fp16)[name = tensor<string, []>("attn_output_5_cast_fp16")];
tensor<int32, [4]> attn_output_7_perm_0 = const()[name = tensor<string, []>("attn_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_207 = const()[name = tensor<string, []>("op_207"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_7_cast_fp16 = transpose(perm = attn_output_7_perm_0, x = attn_output_5_cast_fp16)[name = tensor<string, []>("transpose_112")];
tensor<fp16, [1, 128, 768]> input_25_cast_fp16 = reshape(shape = var_207, x = attn_output_7_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_1_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62655232)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63834944)))];
tensor<fp16, [1, 128, 768]> linear_9_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_output_dense_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_29_cast_fp16 = add(x = linear_9_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
tensor<int32, [1]> input_31_axes_0 = const()[name = tensor<string, []>("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63836544)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63838144)))];
tensor<fp16, [1, 128, 768]> input_31_cast_fp16 = layer_norm(axes = input_31_axes_0, beta = model_bert_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_1_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63839744)))];
tensor<fp16, [3072]> model_bert_encoder_layer_1_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68558400)))];
tensor<fp16, [1, 128, 3072]> linear_10_cast_fp16 = linear(bias = model_bert_encoder_layer_1_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_1_intermediate_dense_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
tensor<string, []> input_35_mode_0 = const()[name = tensor<string, []>("input_35_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_35_cast_fp16 = gelu(mode = input_35_mode_0, x = linear_10_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_1_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68564608)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73283264)))];
tensor<fp16, [1, 128, 768]> linear_11_cast_fp16 = linear(bias = model_bert_encoder_layer_1_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_1_output_dense_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_39_cast_fp16 = add(x = linear_11_cast_fp16, y = input_31_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = tensor<string, []>("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_1_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73284864)))];
tensor<fp16, [768]> model_bert_encoder_layer_1_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_1_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73286464)))];
tensor<fp16, [1, 128, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = model_bert_encoder_layer_1_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_1_output_LayerNorm_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("hidden_states_13_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_2_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73288064)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74467776)))];
tensor<fp16, [1, 128, 768]> linear_12_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_self_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
tensor<int32, [4]> var_251 = const()[name = tensor<string, []>("op_251"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_27_cast_fp16 = reshape(shape = var_251, x = linear_12_cast_fp16)[name = tensor<string, []>("x_27_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_2_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74469376)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75649088)))];
tensor<fp16, [1, 128, 768]> linear_13_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_self_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
tensor<int32, [4]> var_260 = const()[name = tensor<string, []>("op_260"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_31_cast_fp16 = reshape(shape = var_260, x = linear_13_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_2_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75650688)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76830400)))];
tensor<fp16, [1, 128, 768]> linear_14_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_self_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
tensor<int32, [4]> var_269 = const()[name = tensor<string, []>("op_269"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_35_cast_fp16 = reshape(shape = var_269, x = linear_14_cast_fp16)[name = tensor<string, []>("x_35_cast_fp16")];
tensor<int32, [4]> var_271 = const()[name = tensor<string, []>("op_271"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_2_y_0_to_fp16 = const()[name = tensor<string, []>("mul_2_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_2_cast_fp16 = mul(x = x_27_cast_fp16, y = mul_2_y_0_to_fp16)[name = tensor<string, []>("mul_2_cast_fp16")];
tensor<bool, []> matmul_2_transpose_y_0 = const()[name = tensor<string, []>("matmul_2_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_2_transpose_x_0 = const()[name = tensor<string, []>("matmul_2_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_52_perm_0 = const()[name = tensor<string, []>("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_53_perm_0 = const()[name = tensor<string, []>("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_53 = transpose(perm = transpose_53_perm_0, x = x_31_cast_fp16)[name = tensor<string, []>("transpose_109")];
tensor<fp16, [1, 12, 128, 64]> transpose_52 = transpose(perm = transpose_52_perm_0, x = mul_2_cast_fp16)[name = tensor<string, []>("transpose_110")];
tensor<fp16, [1, 12, 128, 128]> matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = transpose_52, y = transpose_53)[name = tensor<string, []>("matmul_2_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_2_cast_fp16 = add(x = matmul_2_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_2_cast_fp16")];
tensor<int32, []> softmax_2_axis_0 = const()[name = tensor<string, []>("softmax_2_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_2_cast_fp16 = softmax(axis = softmax_2_axis_0, x = add_2_cast_fp16)[name = tensor<string, []>("softmax_2_cast_fp16")];
tensor<bool, []> attn_output_9_transpose_x_0 = const()[name = tensor<string, []>("attn_output_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_9_transpose_y_0 = const()[name = tensor<string, []>("attn_output_9_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_5_cast_fp16 = transpose(perm = var_271, x = x_35_cast_fp16)[name = tensor<string, []>("transpose_111")];
tensor<fp16, [1, 12, 128, 64]> attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = softmax_2_cast_fp16, y = value_layer_5_cast_fp16)[name = tensor<string, []>("attn_output_9_cast_fp16")];
tensor<int32, [4]> attn_output_11_perm_0 = const()[name = tensor<string, []>("attn_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_275 = const()[name = tensor<string, []>("op_275"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_11_cast_fp16 = transpose(perm = attn_output_11_perm_0, x = attn_output_9_cast_fp16)[name = tensor<string, []>("transpose_108")];
tensor<fp16, [1, 128, 768]> input_41_cast_fp16 = reshape(shape = var_275, x = attn_output_11_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_2_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76832000)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78011712)))];
tensor<fp16, [1, 128, 768]> linear_15_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_output_dense_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_45_cast_fp16 = add(x = linear_15_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78013312)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78014912)))];
tensor<fp16, [1, 128, 768]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = model_bert_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_2_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78016512)))];
tensor<fp16, [3072]> model_bert_encoder_layer_2_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82735168)))];
tensor<fp16, [1, 128, 3072]> linear_16_cast_fp16 = linear(bias = model_bert_encoder_layer_2_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_2_intermediate_dense_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = linear_16_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_2_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82741376)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87460032)))];
tensor<fp16, [1, 128, 768]> linear_17_cast_fp16 = linear(bias = model_bert_encoder_layer_2_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_2_output_dense_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_55_cast_fp16 = add(x = linear_17_cast_fp16, y = input_47_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = tensor<string, []>("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_2_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87461632)))];
tensor<fp16, [768]> model_bert_encoder_layer_2_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_2_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87463232)))];
tensor<fp16, [1, 128, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = model_bert_encoder_layer_2_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_2_output_LayerNorm_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("hidden_states_19_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_3_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87464832)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88644544)))];
tensor<fp16, [1, 128, 768]> linear_18_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_self_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
tensor<int32, [4]> var_319 = const()[name = tensor<string, []>("op_319"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_39_cast_fp16 = reshape(shape = var_319, x = linear_18_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_3_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88646144)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89825856)))];
tensor<fp16, [1, 128, 768]> linear_19_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_self_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
tensor<int32, [4]> var_328 = const()[name = tensor<string, []>("op_328"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_43_cast_fp16 = reshape(shape = var_328, x = linear_19_cast_fp16)[name = tensor<string, []>("x_43_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_3_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89827456)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91007168)))];
tensor<fp16, [1, 128, 768]> linear_20_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_self_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
tensor<int32, [4]> var_337 = const()[name = tensor<string, []>("op_337"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_47_cast_fp16 = reshape(shape = var_337, x = linear_20_cast_fp16)[name = tensor<string, []>("x_47_cast_fp16")];
tensor<int32, [4]> var_339 = const()[name = tensor<string, []>("op_339"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_3_y_0_to_fp16 = const()[name = tensor<string, []>("mul_3_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_3_cast_fp16 = mul(x = x_39_cast_fp16, y = mul_3_y_0_to_fp16)[name = tensor<string, []>("mul_3_cast_fp16")];
tensor<bool, []> matmul_3_transpose_y_0 = const()[name = tensor<string, []>("matmul_3_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_3_transpose_x_0 = const()[name = tensor<string, []>("matmul_3_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_54_perm_0 = const()[name = tensor<string, []>("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_55_perm_0 = const()[name = tensor<string, []>("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_55 = transpose(perm = transpose_55_perm_0, x = x_43_cast_fp16)[name = tensor<string, []>("transpose_105")];
tensor<fp16, [1, 12, 128, 64]> transpose_54 = transpose(perm = transpose_54_perm_0, x = mul_3_cast_fp16)[name = tensor<string, []>("transpose_106")];
tensor<fp16, [1, 12, 128, 128]> matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = transpose_54, y = transpose_55)[name = tensor<string, []>("matmul_3_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_3_cast_fp16 = add(x = matmul_3_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_3_cast_fp16")];
tensor<int32, []> softmax_3_axis_0 = const()[name = tensor<string, []>("softmax_3_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_3_cast_fp16 = softmax(axis = softmax_3_axis_0, x = add_3_cast_fp16)[name = tensor<string, []>("softmax_3_cast_fp16")];
tensor<bool, []> attn_output_13_transpose_x_0 = const()[name = tensor<string, []>("attn_output_13_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_13_transpose_y_0 = const()[name = tensor<string, []>("attn_output_13_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_7_cast_fp16 = transpose(perm = var_339, x = x_47_cast_fp16)[name = tensor<string, []>("transpose_107")];
tensor<fp16, [1, 12, 128, 64]> attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = softmax_3_cast_fp16, y = value_layer_7_cast_fp16)[name = tensor<string, []>("attn_output_13_cast_fp16")];
tensor<int32, [4]> attn_output_15_perm_0 = const()[name = tensor<string, []>("attn_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_343 = const()[name = tensor<string, []>("op_343"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_15_cast_fp16 = transpose(perm = attn_output_15_perm_0, x = attn_output_13_cast_fp16)[name = tensor<string, []>("transpose_104")];
tensor<fp16, [1, 128, 768]> input_57_cast_fp16 = reshape(shape = var_343, x = attn_output_15_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_3_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91008768)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92188480)))];
tensor<fp16, [1, 128, 768]> linear_21_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_output_dense_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_61_cast_fp16 = add(x = linear_21_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92190080)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92191680)))];
tensor<fp16, [1, 128, 768]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = model_bert_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_3_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92193280)))];
tensor<fp16, [3072]> model_bert_encoder_layer_3_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96911936)))];
tensor<fp16, [1, 128, 3072]> linear_22_cast_fp16 = linear(bias = model_bert_encoder_layer_3_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_3_intermediate_dense_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
tensor<string, []> input_67_mode_0 = const()[name = tensor<string, []>("input_67_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_67_cast_fp16 = gelu(mode = input_67_mode_0, x = linear_22_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_3_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96918144)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101636800)))];
tensor<fp16, [1, 128, 768]> linear_23_cast_fp16 = linear(bias = model_bert_encoder_layer_3_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_3_output_dense_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_71_cast_fp16 = add(x = linear_23_cast_fp16, y = input_63_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
tensor<int32, [1]> hidden_states_25_axes_0 = const()[name = tensor<string, []>("hidden_states_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_3_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101638400)))];
tensor<fp16, [768]> model_bert_encoder_layer_3_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_3_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101640000)))];
tensor<fp16, [1, 128, 768]> hidden_states_25_cast_fp16 = layer_norm(axes = hidden_states_25_axes_0, beta = model_bert_encoder_layer_3_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_3_output_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("hidden_states_25_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_4_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101641600)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102821312)))];
tensor<fp16, [1, 128, 768]> linear_24_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_self_query_weight_to_fp16, x = hidden_states_25_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
tensor<int32, [4]> var_387 = const()[name = tensor<string, []>("op_387"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_51_cast_fp16 = reshape(shape = var_387, x = linear_24_cast_fp16)[name = tensor<string, []>("x_51_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_4_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102822912)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104002624)))];
tensor<fp16, [1, 128, 768]> linear_25_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_self_key_weight_to_fp16, x = hidden_states_25_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
tensor<int32, [4]> var_396 = const()[name = tensor<string, []>("op_396"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_55_cast_fp16 = reshape(shape = var_396, x = linear_25_cast_fp16)[name = tensor<string, []>("x_55_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_4_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104004224)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105183936)))];
tensor<fp16, [1, 128, 768]> linear_26_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_self_value_weight_to_fp16, x = hidden_states_25_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
tensor<int32, [4]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_59_cast_fp16 = reshape(shape = var_405, x = linear_26_cast_fp16)[name = tensor<string, []>("x_59_cast_fp16")];
tensor<int32, [4]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_4_y_0_to_fp16 = const()[name = tensor<string, []>("mul_4_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_4_cast_fp16 = mul(x = x_51_cast_fp16, y = mul_4_y_0_to_fp16)[name = tensor<string, []>("mul_4_cast_fp16")];
tensor<bool, []> matmul_4_transpose_y_0 = const()[name = tensor<string, []>("matmul_4_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_4_transpose_x_0 = const()[name = tensor<string, []>("matmul_4_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_56_perm_0 = const()[name = tensor<string, []>("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_57_perm_0 = const()[name = tensor<string, []>("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_57 = transpose(perm = transpose_57_perm_0, x = x_55_cast_fp16)[name = tensor<string, []>("transpose_101")];
tensor<fp16, [1, 12, 128, 64]> transpose_56 = transpose(perm = transpose_56_perm_0, x = mul_4_cast_fp16)[name = tensor<string, []>("transpose_102")];
tensor<fp16, [1, 12, 128, 128]> matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = transpose_56, y = transpose_57)[name = tensor<string, []>("matmul_4_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_4_cast_fp16 = add(x = matmul_4_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_4_cast_fp16")];
tensor<int32, []> softmax_4_axis_0 = const()[name = tensor<string, []>("softmax_4_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_4_cast_fp16 = softmax(axis = softmax_4_axis_0, x = add_4_cast_fp16)[name = tensor<string, []>("softmax_4_cast_fp16")];
tensor<bool, []> attn_output_17_transpose_x_0 = const()[name = tensor<string, []>("attn_output_17_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_17_transpose_y_0 = const()[name = tensor<string, []>("attn_output_17_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_9_cast_fp16 = transpose(perm = var_407, x = x_59_cast_fp16)[name = tensor<string, []>("transpose_103")];
tensor<fp16, [1, 12, 128, 64]> attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = softmax_4_cast_fp16, y = value_layer_9_cast_fp16)[name = tensor<string, []>("attn_output_17_cast_fp16")];
tensor<int32, [4]> attn_output_19_perm_0 = const()[name = tensor<string, []>("attn_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_411 = const()[name = tensor<string, []>("op_411"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_19_cast_fp16 = transpose(perm = attn_output_19_perm_0, x = attn_output_17_cast_fp16)[name = tensor<string, []>("transpose_100")];
tensor<fp16, [1, 128, 768]> input_73_cast_fp16 = reshape(shape = var_411, x = attn_output_19_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_4_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105185536)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106365248)))];
tensor<fp16, [1, 128, 768]> linear_27_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_output_dense_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_77_cast_fp16 = add(x = linear_27_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
tensor<int32, [1]> input_79_axes_0 = const()[name = tensor<string, []>("input_79_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106366848)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106368448)))];
tensor<fp16, [1, 128, 768]> input_79_cast_fp16 = layer_norm(axes = input_79_axes_0, beta = model_bert_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_4_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106370048)))];
tensor<fp16, [3072]> model_bert_encoder_layer_4_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111088704)))];
tensor<fp16, [1, 128, 3072]> linear_28_cast_fp16 = linear(bias = model_bert_encoder_layer_4_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_4_intermediate_dense_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
tensor<string, []> input_83_mode_0 = const()[name = tensor<string, []>("input_83_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_83_cast_fp16 = gelu(mode = input_83_mode_0, x = linear_28_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_4_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111094912)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115813568)))];
tensor<fp16, [1, 128, 768]> linear_29_cast_fp16 = linear(bias = model_bert_encoder_layer_4_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_4_output_dense_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_87_cast_fp16 = add(x = linear_29_cast_fp16, y = input_79_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
tensor<int32, [1]> hidden_states_31_axes_0 = const()[name = tensor<string, []>("hidden_states_31_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_4_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115815168)))];
tensor<fp16, [768]> model_bert_encoder_layer_4_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_4_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115816768)))];
tensor<fp16, [1, 128, 768]> hidden_states_31_cast_fp16 = layer_norm(axes = hidden_states_31_axes_0, beta = model_bert_encoder_layer_4_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_4_output_LayerNorm_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("hidden_states_31_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_5_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115818368)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116998080)))];
tensor<fp16, [1, 128, 768]> linear_30_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_self_query_weight_to_fp16, x = hidden_states_31_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
tensor<int32, [4]> var_455 = const()[name = tensor<string, []>("op_455"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_63_cast_fp16 = reshape(shape = var_455, x = linear_30_cast_fp16)[name = tensor<string, []>("x_63_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_5_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116999680)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118179392)))];
tensor<fp16, [1, 128, 768]> linear_31_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_self_key_weight_to_fp16, x = hidden_states_31_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
tensor<int32, [4]> var_464 = const()[name = tensor<string, []>("op_464"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_67_cast_fp16 = reshape(shape = var_464, x = linear_31_cast_fp16)[name = tensor<string, []>("x_67_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_5_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118180992)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119360704)))];
tensor<fp16, [1, 128, 768]> linear_32_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_self_value_weight_to_fp16, x = hidden_states_31_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
tensor<int32, [4]> var_473 = const()[name = tensor<string, []>("op_473"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_71_cast_fp16 = reshape(shape = var_473, x = linear_32_cast_fp16)[name = tensor<string, []>("x_71_cast_fp16")];
tensor<int32, [4]> var_475 = const()[name = tensor<string, []>("op_475"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_5_y_0_to_fp16 = const()[name = tensor<string, []>("mul_5_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_5_cast_fp16 = mul(x = x_63_cast_fp16, y = mul_5_y_0_to_fp16)[name = tensor<string, []>("mul_5_cast_fp16")];
tensor<bool, []> matmul_5_transpose_y_0 = const()[name = tensor<string, []>("matmul_5_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_5_transpose_x_0 = const()[name = tensor<string, []>("matmul_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_58_perm_0 = const()[name = tensor<string, []>("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_59_perm_0 = const()[name = tensor<string, []>("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_59 = transpose(perm = transpose_59_perm_0, x = x_67_cast_fp16)[name = tensor<string, []>("transpose_97")];
tensor<fp16, [1, 12, 128, 64]> transpose_58 = transpose(perm = transpose_58_perm_0, x = mul_5_cast_fp16)[name = tensor<string, []>("transpose_98")];
tensor<fp16, [1, 12, 128, 128]> matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = transpose_58, y = transpose_59)[name = tensor<string, []>("matmul_5_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_5_cast_fp16 = add(x = matmul_5_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_5_cast_fp16")];
tensor<int32, []> softmax_5_axis_0 = const()[name = tensor<string, []>("softmax_5_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_5_cast_fp16 = softmax(axis = softmax_5_axis_0, x = add_5_cast_fp16)[name = tensor<string, []>("softmax_5_cast_fp16")];
tensor<bool, []> attn_output_21_transpose_x_0 = const()[name = tensor<string, []>("attn_output_21_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_21_transpose_y_0 = const()[name = tensor<string, []>("attn_output_21_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_11_cast_fp16 = transpose(perm = var_475, x = x_71_cast_fp16)[name = tensor<string, []>("transpose_99")];
tensor<fp16, [1, 12, 128, 64]> attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = softmax_5_cast_fp16, y = value_layer_11_cast_fp16)[name = tensor<string, []>("attn_output_21_cast_fp16")];
tensor<int32, [4]> attn_output_23_perm_0 = const()[name = tensor<string, []>("attn_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_479 = const()[name = tensor<string, []>("op_479"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_23_cast_fp16 = transpose(perm = attn_output_23_perm_0, x = attn_output_21_cast_fp16)[name = tensor<string, []>("transpose_96")];
tensor<fp16, [1, 128, 768]> input_89_cast_fp16 = reshape(shape = var_479, x = attn_output_23_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_5_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119362304)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120542016)))];
tensor<fp16, [1, 128, 768]> linear_33_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_output_dense_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_93_cast_fp16 = add(x = linear_33_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
tensor<int32, [1]> input_95_axes_0 = const()[name = tensor<string, []>("input_95_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120543616)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120545216)))];
tensor<fp16, [1, 128, 768]> input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, beta = model_bert_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_5_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120546816)))];
tensor<fp16, [3072]> model_bert_encoder_layer_5_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125265472)))];
tensor<fp16, [1, 128, 3072]> linear_34_cast_fp16 = linear(bias = model_bert_encoder_layer_5_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_5_intermediate_dense_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
tensor<string, []> input_99_mode_0 = const()[name = tensor<string, []>("input_99_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = linear_34_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_5_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125271680)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129990336)))];
tensor<fp16, [1, 128, 768]> linear_35_cast_fp16 = linear(bias = model_bert_encoder_layer_5_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_5_output_dense_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_103_cast_fp16 = add(x = linear_35_cast_fp16, y = input_95_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
tensor<int32, [1]> hidden_states_37_axes_0 = const()[name = tensor<string, []>("hidden_states_37_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_5_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129991936)))];
tensor<fp16, [768]> model_bert_encoder_layer_5_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_5_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129993536)))];
tensor<fp16, [1, 128, 768]> hidden_states_37_cast_fp16 = layer_norm(axes = hidden_states_37_axes_0, beta = model_bert_encoder_layer_5_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_5_output_LayerNorm_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("hidden_states_37_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_6_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129995136)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(131174848)))];
tensor<fp16, [1, 128, 768]> linear_36_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_self_query_weight_to_fp16, x = hidden_states_37_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
tensor<int32, [4]> var_523 = const()[name = tensor<string, []>("op_523"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_75_cast_fp16 = reshape(shape = var_523, x = linear_36_cast_fp16)[name = tensor<string, []>("x_75_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_6_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(131176448)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132356160)))];
tensor<fp16, [1, 128, 768]> linear_37_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_self_key_weight_to_fp16, x = hidden_states_37_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
tensor<int32, [4]> var_532 = const()[name = tensor<string, []>("op_532"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_79_cast_fp16 = reshape(shape = var_532, x = linear_37_cast_fp16)[name = tensor<string, []>("x_79_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_6_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132357760)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133537472)))];
tensor<fp16, [1, 128, 768]> linear_38_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_self_value_weight_to_fp16, x = hidden_states_37_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
tensor<int32, [4]> var_541 = const()[name = tensor<string, []>("op_541"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_83_cast_fp16 = reshape(shape = var_541, x = linear_38_cast_fp16)[name = tensor<string, []>("x_83_cast_fp16")];
tensor<int32, [4]> var_543 = const()[name = tensor<string, []>("op_543"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_6_y_0_to_fp16 = const()[name = tensor<string, []>("mul_6_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_6_cast_fp16 = mul(x = x_75_cast_fp16, y = mul_6_y_0_to_fp16)[name = tensor<string, []>("mul_6_cast_fp16")];
tensor<bool, []> matmul_6_transpose_y_0 = const()[name = tensor<string, []>("matmul_6_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_6_transpose_x_0 = const()[name = tensor<string, []>("matmul_6_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_60_perm_0 = const()[name = tensor<string, []>("transpose_60_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_61_perm_0 = const()[name = tensor<string, []>("transpose_61_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_61 = transpose(perm = transpose_61_perm_0, x = x_79_cast_fp16)[name = tensor<string, []>("transpose_93")];
tensor<fp16, [1, 12, 128, 64]> transpose_60 = transpose(perm = transpose_60_perm_0, x = mul_6_cast_fp16)[name = tensor<string, []>("transpose_94")];
tensor<fp16, [1, 12, 128, 128]> matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = transpose_60, y = transpose_61)[name = tensor<string, []>("matmul_6_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_6_cast_fp16 = add(x = matmul_6_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_6_cast_fp16")];
tensor<int32, []> softmax_6_axis_0 = const()[name = tensor<string, []>("softmax_6_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_6_cast_fp16 = softmax(axis = softmax_6_axis_0, x = add_6_cast_fp16)[name = tensor<string, []>("softmax_6_cast_fp16")];
tensor<bool, []> attn_output_25_transpose_x_0 = const()[name = tensor<string, []>("attn_output_25_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_25_transpose_y_0 = const()[name = tensor<string, []>("attn_output_25_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_13_cast_fp16 = transpose(perm = var_543, x = x_83_cast_fp16)[name = tensor<string, []>("transpose_95")];
tensor<fp16, [1, 12, 128, 64]> attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = softmax_6_cast_fp16, y = value_layer_13_cast_fp16)[name = tensor<string, []>("attn_output_25_cast_fp16")];
tensor<int32, [4]> attn_output_27_perm_0 = const()[name = tensor<string, []>("attn_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_547 = const()[name = tensor<string, []>("op_547"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_27_cast_fp16 = transpose(perm = attn_output_27_perm_0, x = attn_output_25_cast_fp16)[name = tensor<string, []>("transpose_92")];
tensor<fp16, [1, 128, 768]> input_105_cast_fp16 = reshape(shape = var_547, x = attn_output_27_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_6_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133539072)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134718784)))];
tensor<fp16, [1, 128, 768]> linear_39_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_output_dense_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_109_cast_fp16 = add(x = linear_39_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
tensor<int32, [1]> input_111_axes_0 = const()[name = tensor<string, []>("input_111_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_6_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134720384)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134721984)))];
tensor<fp16, [1, 128, 768]> input_111_cast_fp16 = layer_norm(axes = input_111_axes_0, beta = model_bert_encoder_layer_6_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_6_attention_output_LayerNorm_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_6_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134723584)))];
tensor<fp16, [3072]> model_bert_encoder_layer_6_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139442240)))];
tensor<fp16, [1, 128, 3072]> linear_40_cast_fp16 = linear(bias = model_bert_encoder_layer_6_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_6_intermediate_dense_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
tensor<string, []> input_115_mode_0 = const()[name = tensor<string, []>("input_115_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_115_cast_fp16 = gelu(mode = input_115_mode_0, x = linear_40_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_6_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139448448)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144167104)))];
tensor<fp16, [1, 128, 768]> linear_41_cast_fp16 = linear(bias = model_bert_encoder_layer_6_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_6_output_dense_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_119_cast_fp16 = add(x = linear_41_cast_fp16, y = input_111_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
tensor<int32, [1]> hidden_states_43_axes_0 = const()[name = tensor<string, []>("hidden_states_43_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_6_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144168704)))];
tensor<fp16, [768]> model_bert_encoder_layer_6_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_6_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144170304)))];
tensor<fp16, [1, 128, 768]> hidden_states_43_cast_fp16 = layer_norm(axes = hidden_states_43_axes_0, beta = model_bert_encoder_layer_6_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_6_output_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = tensor<string, []>("hidden_states_43_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_7_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144171904)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145351616)))];
tensor<fp16, [1, 128, 768]> linear_42_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_self_query_weight_to_fp16, x = hidden_states_43_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")];
tensor<int32, [4]> var_591 = const()[name = tensor<string, []>("op_591"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_87_cast_fp16 = reshape(shape = var_591, x = linear_42_cast_fp16)[name = tensor<string, []>("x_87_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_7_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145353216)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146532928)))];
tensor<fp16, [1, 128, 768]> linear_43_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_self_key_weight_to_fp16, x = hidden_states_43_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
tensor<int32, [4]> var_600 = const()[name = tensor<string, []>("op_600"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_91_cast_fp16 = reshape(shape = var_600, x = linear_43_cast_fp16)[name = tensor<string, []>("x_91_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_7_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(146534528)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147714240)))];
tensor<fp16, [1, 128, 768]> linear_44_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_self_value_weight_to_fp16, x = hidden_states_43_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
tensor<int32, [4]> var_609 = const()[name = tensor<string, []>("op_609"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_95_cast_fp16 = reshape(shape = var_609, x = linear_44_cast_fp16)[name = tensor<string, []>("x_95_cast_fp16")];
tensor<int32, [4]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_7_y_0_to_fp16 = const()[name = tensor<string, []>("mul_7_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_7_cast_fp16 = mul(x = x_87_cast_fp16, y = mul_7_y_0_to_fp16)[name = tensor<string, []>("mul_7_cast_fp16")];
tensor<bool, []> matmul_7_transpose_y_0 = const()[name = tensor<string, []>("matmul_7_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_7_transpose_x_0 = const()[name = tensor<string, []>("matmul_7_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_62_perm_0 = const()[name = tensor<string, []>("transpose_62_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_63_perm_0 = const()[name = tensor<string, []>("transpose_63_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_63 = transpose(perm = transpose_63_perm_0, x = x_91_cast_fp16)[name = tensor<string, []>("transpose_89")];
tensor<fp16, [1, 12, 128, 64]> transpose_62 = transpose(perm = transpose_62_perm_0, x = mul_7_cast_fp16)[name = tensor<string, []>("transpose_90")];
tensor<fp16, [1, 12, 128, 128]> matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = transpose_62, y = transpose_63)[name = tensor<string, []>("matmul_7_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_7_cast_fp16 = add(x = matmul_7_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_7_cast_fp16")];
tensor<int32, []> softmax_7_axis_0 = const()[name = tensor<string, []>("softmax_7_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_7_cast_fp16 = softmax(axis = softmax_7_axis_0, x = add_7_cast_fp16)[name = tensor<string, []>("softmax_7_cast_fp16")];
tensor<bool, []> attn_output_29_transpose_x_0 = const()[name = tensor<string, []>("attn_output_29_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_29_transpose_y_0 = const()[name = tensor<string, []>("attn_output_29_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_15_cast_fp16 = transpose(perm = var_611, x = x_95_cast_fp16)[name = tensor<string, []>("transpose_91")];
tensor<fp16, [1, 12, 128, 64]> attn_output_29_cast_fp16 = matmul(transpose_x = attn_output_29_transpose_x_0, transpose_y = attn_output_29_transpose_y_0, x = softmax_7_cast_fp16, y = value_layer_15_cast_fp16)[name = tensor<string, []>("attn_output_29_cast_fp16")];
tensor<int32, [4]> attn_output_31_perm_0 = const()[name = tensor<string, []>("attn_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_615 = const()[name = tensor<string, []>("op_615"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_31_cast_fp16 = transpose(perm = attn_output_31_perm_0, x = attn_output_29_cast_fp16)[name = tensor<string, []>("transpose_88")];
tensor<fp16, [1, 128, 768]> input_121_cast_fp16 = reshape(shape = var_615, x = attn_output_31_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_7_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147715840)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148895552)))];
tensor<fp16, [1, 128, 768]> linear_45_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_output_dense_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_125_cast_fp16 = add(x = linear_45_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
tensor<int32, [1]> input_127_axes_0 = const()[name = tensor<string, []>("input_127_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_7_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148897152)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148898752)))];
tensor<fp16, [1, 128, 768]> input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = model_bert_encoder_layer_7_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_7_attention_output_LayerNorm_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_7_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148900352)))];
tensor<fp16, [3072]> model_bert_encoder_layer_7_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(153619008)))];
tensor<fp16, [1, 128, 3072]> linear_46_cast_fp16 = linear(bias = model_bert_encoder_layer_7_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_7_intermediate_dense_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
tensor<string, []> input_131_mode_0 = const()[name = tensor<string, []>("input_131_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = linear_46_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_7_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(153625216)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158343872)))];
tensor<fp16, [1, 128, 768]> linear_47_cast_fp16 = linear(bias = model_bert_encoder_layer_7_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_7_output_dense_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_135_cast_fp16 = add(x = linear_47_cast_fp16, y = input_127_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
tensor<int32, [1]> hidden_states_49_axes_0 = const()[name = tensor<string, []>("hidden_states_49_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_7_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158345472)))];
tensor<fp16, [768]> model_bert_encoder_layer_7_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_7_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158347072)))];
tensor<fp16, [1, 128, 768]> hidden_states_49_cast_fp16 = layer_norm(axes = hidden_states_49_axes_0, beta = model_bert_encoder_layer_7_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_7_output_LayerNorm_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("hidden_states_49_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_8_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158348672)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(159528384)))];
tensor<fp16, [1, 128, 768]> linear_48_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_self_query_weight_to_fp16, x = hidden_states_49_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
tensor<int32, [4]> var_659 = const()[name = tensor<string, []>("op_659"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_99_cast_fp16 = reshape(shape = var_659, x = linear_48_cast_fp16)[name = tensor<string, []>("x_99_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_8_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(159529984)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160709696)))];
tensor<fp16, [1, 128, 768]> linear_49_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_self_key_weight_to_fp16, x = hidden_states_49_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")];
tensor<int32, [4]> var_668 = const()[name = tensor<string, []>("op_668"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_103_cast_fp16 = reshape(shape = var_668, x = linear_49_cast_fp16)[name = tensor<string, []>("x_103_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_8_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160711296)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161891008)))];
tensor<fp16, [1, 128, 768]> linear_50_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_self_value_weight_to_fp16, x = hidden_states_49_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")];
tensor<int32, [4]> var_677 = const()[name = tensor<string, []>("op_677"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_107_cast_fp16 = reshape(shape = var_677, x = linear_50_cast_fp16)[name = tensor<string, []>("x_107_cast_fp16")];
tensor<int32, [4]> var_679 = const()[name = tensor<string, []>("op_679"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_8_y_0_to_fp16 = const()[name = tensor<string, []>("mul_8_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_8_cast_fp16 = mul(x = x_99_cast_fp16, y = mul_8_y_0_to_fp16)[name = tensor<string, []>("mul_8_cast_fp16")];
tensor<bool, []> matmul_8_transpose_y_0 = const()[name = tensor<string, []>("matmul_8_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_8_transpose_x_0 = const()[name = tensor<string, []>("matmul_8_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_64_perm_0 = const()[name = tensor<string, []>("transpose_64_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_65_perm_0 = const()[name = tensor<string, []>("transpose_65_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_65 = transpose(perm = transpose_65_perm_0, x = x_103_cast_fp16)[name = tensor<string, []>("transpose_85")];
tensor<fp16, [1, 12, 128, 64]> transpose_64 = transpose(perm = transpose_64_perm_0, x = mul_8_cast_fp16)[name = tensor<string, []>("transpose_86")];
tensor<fp16, [1, 12, 128, 128]> matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = transpose_64, y = transpose_65)[name = tensor<string, []>("matmul_8_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_8_cast_fp16 = add(x = matmul_8_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_8_cast_fp16")];
tensor<int32, []> softmax_8_axis_0 = const()[name = tensor<string, []>("softmax_8_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_8_cast_fp16 = softmax(axis = softmax_8_axis_0, x = add_8_cast_fp16)[name = tensor<string, []>("softmax_8_cast_fp16")];
tensor<bool, []> attn_output_33_transpose_x_0 = const()[name = tensor<string, []>("attn_output_33_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_33_transpose_y_0 = const()[name = tensor<string, []>("attn_output_33_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_17_cast_fp16 = transpose(perm = var_679, x = x_107_cast_fp16)[name = tensor<string, []>("transpose_87")];
tensor<fp16, [1, 12, 128, 64]> attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = softmax_8_cast_fp16, y = value_layer_17_cast_fp16)[name = tensor<string, []>("attn_output_33_cast_fp16")];
tensor<int32, [4]> attn_output_35_perm_0 = const()[name = tensor<string, []>("attn_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_683 = const()[name = tensor<string, []>("op_683"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_35_cast_fp16 = transpose(perm = attn_output_35_perm_0, x = attn_output_33_cast_fp16)[name = tensor<string, []>("transpose_84")];
tensor<fp16, [1, 128, 768]> input_137_cast_fp16 = reshape(shape = var_683, x = attn_output_35_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_8_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161892608)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163072320)))];
tensor<fp16, [1, 128, 768]> linear_51_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_output_dense_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("linear_51_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_141_cast_fp16 = add(x = linear_51_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
tensor<int32, [1]> input_143_axes_0 = const()[name = tensor<string, []>("input_143_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_8_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163073920)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163075520)))];
tensor<fp16, [1, 128, 768]> input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = model_bert_encoder_layer_8_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_8_attention_output_LayerNorm_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_8_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163077120)))];
tensor<fp16, [3072]> model_bert_encoder_layer_8_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167795776)))];
tensor<fp16, [1, 128, 3072]> linear_52_cast_fp16 = linear(bias = model_bert_encoder_layer_8_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_8_intermediate_dense_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")];
tensor<string, []> input_147_mode_0 = const()[name = tensor<string, []>("input_147_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_147_cast_fp16 = gelu(mode = input_147_mode_0, x = linear_52_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_8_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167801984)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172520640)))];
tensor<fp16, [1, 128, 768]> linear_53_cast_fp16 = linear(bias = model_bert_encoder_layer_8_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_8_output_dense_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("linear_53_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_151_cast_fp16 = add(x = linear_53_cast_fp16, y = input_143_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
tensor<int32, [1]> hidden_states_55_axes_0 = const()[name = tensor<string, []>("hidden_states_55_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_8_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172522240)))];
tensor<fp16, [768]> model_bert_encoder_layer_8_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_8_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172523840)))];
tensor<fp16, [1, 128, 768]> hidden_states_55_cast_fp16 = layer_norm(axes = hidden_states_55_axes_0, beta = model_bert_encoder_layer_8_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_8_output_LayerNorm_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("hidden_states_55_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_9_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172525440)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(173705152)))];
tensor<fp16, [1, 128, 768]> linear_54_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_self_query_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")];
tensor<int32, [4]> var_727 = const()[name = tensor<string, []>("op_727"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_111_cast_fp16 = reshape(shape = var_727, x = linear_54_cast_fp16)[name = tensor<string, []>("x_111_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_9_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(173706752)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174886464)))];
tensor<fp16, [1, 128, 768]> linear_55_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_self_key_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")];
tensor<int32, [4]> var_736 = const()[name = tensor<string, []>("op_736"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_115_cast_fp16 = reshape(shape = var_736, x = linear_55_cast_fp16)[name = tensor<string, []>("x_115_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_9_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174888064)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176067776)))];
tensor<fp16, [1, 128, 768]> linear_56_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_self_value_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")];
tensor<int32, [4]> var_745 = const()[name = tensor<string, []>("op_745"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_119_cast_fp16 = reshape(shape = var_745, x = linear_56_cast_fp16)[name = tensor<string, []>("x_119_cast_fp16")];
tensor<int32, [4]> var_747 = const()[name = tensor<string, []>("op_747"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_9_y_0_to_fp16 = const()[name = tensor<string, []>("mul_9_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_9_cast_fp16 = mul(x = x_111_cast_fp16, y = mul_9_y_0_to_fp16)[name = tensor<string, []>("mul_9_cast_fp16")];
tensor<bool, []> matmul_9_transpose_y_0 = const()[name = tensor<string, []>("matmul_9_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_9_transpose_x_0 = const()[name = tensor<string, []>("matmul_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_66_perm_0 = const()[name = tensor<string, []>("transpose_66_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_67_perm_0 = const()[name = tensor<string, []>("transpose_67_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_67 = transpose(perm = transpose_67_perm_0, x = x_115_cast_fp16)[name = tensor<string, []>("transpose_81")];
tensor<fp16, [1, 12, 128, 64]> transpose_66 = transpose(perm = transpose_66_perm_0, x = mul_9_cast_fp16)[name = tensor<string, []>("transpose_82")];
tensor<fp16, [1, 12, 128, 128]> matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = transpose_66, y = transpose_67)[name = tensor<string, []>("matmul_9_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_9_cast_fp16 = add(x = matmul_9_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_9_cast_fp16")];
tensor<int32, []> softmax_9_axis_0 = const()[name = tensor<string, []>("softmax_9_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_9_cast_fp16 = softmax(axis = softmax_9_axis_0, x = add_9_cast_fp16)[name = tensor<string, []>("softmax_9_cast_fp16")];
tensor<bool, []> attn_output_37_transpose_x_0 = const()[name = tensor<string, []>("attn_output_37_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_37_transpose_y_0 = const()[name = tensor<string, []>("attn_output_37_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_19_cast_fp16 = transpose(perm = var_747, x = x_119_cast_fp16)[name = tensor<string, []>("transpose_83")];
tensor<fp16, [1, 12, 128, 64]> attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = softmax_9_cast_fp16, y = value_layer_19_cast_fp16)[name = tensor<string, []>("attn_output_37_cast_fp16")];
tensor<int32, [4]> attn_output_39_perm_0 = const()[name = tensor<string, []>("attn_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_751 = const()[name = tensor<string, []>("op_751"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_39_cast_fp16 = transpose(perm = attn_output_39_perm_0, x = attn_output_37_cast_fp16)[name = tensor<string, []>("transpose_80")];
tensor<fp16, [1, 128, 768]> input_153_cast_fp16 = reshape(shape = var_751, x = attn_output_39_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_9_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176069376)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177249088)))];
tensor<fp16, [1, 128, 768]> linear_57_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_output_dense_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_157_cast_fp16 = add(x = linear_57_cast_fp16, y = hidden_states_55_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
tensor<int32, [1]> input_159_axes_0 = const()[name = tensor<string, []>("input_159_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_9_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177250688)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177252288)))];
tensor<fp16, [1, 128, 768]> input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, beta = model_bert_encoder_layer_9_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_9_attention_output_LayerNorm_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_9_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177253888)))];
tensor<fp16, [3072]> model_bert_encoder_layer_9_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181972544)))];
tensor<fp16, [1, 128, 3072]> linear_58_cast_fp16 = linear(bias = model_bert_encoder_layer_9_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_9_intermediate_dense_weight_to_fp16, x = input_159_cast_fp16)[name = tensor<string, []>("linear_58_cast_fp16")];
tensor<string, []> input_163_mode_0 = const()[name = tensor<string, []>("input_163_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_163_cast_fp16 = gelu(mode = input_163_mode_0, x = linear_58_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_9_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181978752)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186697408)))];
tensor<fp16, [1, 128, 768]> linear_59_cast_fp16 = linear(bias = model_bert_encoder_layer_9_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_9_output_dense_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_167_cast_fp16 = add(x = linear_59_cast_fp16, y = input_159_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
tensor<int32, [1]> hidden_states_61_axes_0 = const()[name = tensor<string, []>("hidden_states_61_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_9_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186699008)))];
tensor<fp16, [768]> model_bert_encoder_layer_9_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_9_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186700608)))];
tensor<fp16, [1, 128, 768]> hidden_states_61_cast_fp16 = layer_norm(axes = hidden_states_61_axes_0, beta = model_bert_encoder_layer_9_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_9_output_LayerNorm_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("hidden_states_61_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_10_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186702208)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187881920)))];
tensor<fp16, [1, 128, 768]> linear_60_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_self_query_weight_to_fp16, x = hidden_states_61_cast_fp16)[name = tensor<string, []>("linear_60_cast_fp16")];
tensor<int32, [4]> var_795 = const()[name = tensor<string, []>("op_795"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_123_cast_fp16 = reshape(shape = var_795, x = linear_60_cast_fp16)[name = tensor<string, []>("x_123_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_10_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187883520)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189063232)))];
tensor<fp16, [1, 128, 768]> linear_61_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_self_key_weight_to_fp16, x = hidden_states_61_cast_fp16)[name = tensor<string, []>("linear_61_cast_fp16")];
tensor<int32, [4]> var_804 = const()[name = tensor<string, []>("op_804"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_127_cast_fp16 = reshape(shape = var_804, x = linear_61_cast_fp16)[name = tensor<string, []>("x_127_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_10_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189064832)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190244544)))];
tensor<fp16, [1, 128, 768]> linear_62_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_self_value_weight_to_fp16, x = hidden_states_61_cast_fp16)[name = tensor<string, []>("linear_62_cast_fp16")];
tensor<int32, [4]> var_813 = const()[name = tensor<string, []>("op_813"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_131_cast_fp16 = reshape(shape = var_813, x = linear_62_cast_fp16)[name = tensor<string, []>("x_131_cast_fp16")];
tensor<int32, [4]> var_815 = const()[name = tensor<string, []>("op_815"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_10_y_0_to_fp16 = const()[name = tensor<string, []>("mul_10_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_10_cast_fp16 = mul(x = x_123_cast_fp16, y = mul_10_y_0_to_fp16)[name = tensor<string, []>("mul_10_cast_fp16")];
tensor<bool, []> matmul_10_transpose_y_0 = const()[name = tensor<string, []>("matmul_10_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_10_transpose_x_0 = const()[name = tensor<string, []>("matmul_10_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_68_perm_0 = const()[name = tensor<string, []>("transpose_68_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_69_perm_0 = const()[name = tensor<string, []>("transpose_69_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_69 = transpose(perm = transpose_69_perm_0, x = x_127_cast_fp16)[name = tensor<string, []>("transpose_77")];
tensor<fp16, [1, 12, 128, 64]> transpose_68 = transpose(perm = transpose_68_perm_0, x = mul_10_cast_fp16)[name = tensor<string, []>("transpose_78")];
tensor<fp16, [1, 12, 128, 128]> matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = transpose_68, y = transpose_69)[name = tensor<string, []>("matmul_10_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_10_cast_fp16 = add(x = matmul_10_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_10_cast_fp16")];
tensor<int32, []> softmax_10_axis_0 = const()[name = tensor<string, []>("softmax_10_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_10_cast_fp16 = softmax(axis = softmax_10_axis_0, x = add_10_cast_fp16)[name = tensor<string, []>("softmax_10_cast_fp16")];
tensor<bool, []> attn_output_41_transpose_x_0 = const()[name = tensor<string, []>("attn_output_41_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_41_transpose_y_0 = const()[name = tensor<string, []>("attn_output_41_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_21_cast_fp16 = transpose(perm = var_815, x = x_131_cast_fp16)[name = tensor<string, []>("transpose_79")];
tensor<fp16, [1, 12, 128, 64]> attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = softmax_10_cast_fp16, y = value_layer_21_cast_fp16)[name = tensor<string, []>("attn_output_41_cast_fp16")];
tensor<int32, [4]> attn_output_43_perm_0 = const()[name = tensor<string, []>("attn_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_819 = const()[name = tensor<string, []>("op_819"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_43_cast_fp16 = transpose(perm = attn_output_43_perm_0, x = attn_output_41_cast_fp16)[name = tensor<string, []>("transpose_76")];
tensor<fp16, [1, 128, 768]> input_169_cast_fp16 = reshape(shape = var_819, x = attn_output_43_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_10_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190246144)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191425856)))];
tensor<fp16, [1, 128, 768]> linear_63_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_output_dense_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("linear_63_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_173_cast_fp16 = add(x = linear_63_cast_fp16, y = hidden_states_61_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
tensor<int32, [1]> input_175_axes_0 = const()[name = tensor<string, []>("input_175_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_10_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191427456)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191429056)))];
tensor<fp16, [1, 128, 768]> input_175_cast_fp16 = layer_norm(axes = input_175_axes_0, beta = model_bert_encoder_layer_10_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_10_attention_output_LayerNorm_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("input_175_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_10_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191430656)))];
tensor<fp16, [3072]> model_bert_encoder_layer_10_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196149312)))];
tensor<fp16, [1, 128, 3072]> linear_64_cast_fp16 = linear(bias = model_bert_encoder_layer_10_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_10_intermediate_dense_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("linear_64_cast_fp16")];
tensor<string, []> input_179_mode_0 = const()[name = tensor<string, []>("input_179_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_179_cast_fp16 = gelu(mode = input_179_mode_0, x = linear_64_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_10_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196155520)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200874176)))];
tensor<fp16, [1, 128, 768]> linear_65_cast_fp16 = linear(bias = model_bert_encoder_layer_10_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_10_output_dense_weight_to_fp16, x = input_179_cast_fp16)[name = tensor<string, []>("linear_65_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_183_cast_fp16 = add(x = linear_65_cast_fp16, y = input_175_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
tensor<int32, [1]> hidden_states_67_axes_0 = const()[name = tensor<string, []>("hidden_states_67_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_10_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200875776)))];
tensor<fp16, [768]> model_bert_encoder_layer_10_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_10_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200877376)))];
tensor<fp16, [1, 128, 768]> hidden_states_67_cast_fp16 = layer_norm(axes = hidden_states_67_axes_0, beta = model_bert_encoder_layer_10_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_10_output_LayerNorm_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("hidden_states_67_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_11_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_self_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200878976)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202058688)))];
tensor<fp16, [1, 128, 768]> linear_66_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_self_query_weight_to_fp16, x = hidden_states_67_cast_fp16)[name = tensor<string, []>("linear_66_cast_fp16")];
tensor<int32, [4]> var_863 = const()[name = tensor<string, []>("op_863"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_135_cast_fp16 = reshape(shape = var_863, x = linear_66_cast_fp16)[name = tensor<string, []>("x_135_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_11_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_self_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202060288)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203240000)))];
tensor<fp16, [1, 128, 768]> linear_67_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_self_key_weight_to_fp16, x = hidden_states_67_cast_fp16)[name = tensor<string, []>("linear_67_cast_fp16")];
tensor<int32, [4]> var_872 = const()[name = tensor<string, []>("op_872"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_139_cast_fp16 = reshape(shape = var_872, x = linear_67_cast_fp16)[name = tensor<string, []>("x_139_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_11_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_self_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(203241600)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(204421312)))];
tensor<fp16, [1, 128, 768]> linear_68_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_self_value_weight_to_fp16, x = hidden_states_67_cast_fp16)[name = tensor<string, []>("linear_68_cast_fp16")];
tensor<int32, [4]> var_881 = const()[name = tensor<string, []>("op_881"), val = tensor<int32, [4]>([1, 128, 12, 64])];
tensor<fp16, [1, 128, 12, 64]> x_cast_fp16 = reshape(shape = var_881, x = linear_68_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
tensor<int32, [4]> var_883 = const()[name = tensor<string, []>("op_883"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, []> mul_11_y_0_to_fp16 = const()[name = tensor<string, []>("mul_11_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 128, 12, 64]> mul_11_cast_fp16 = mul(x = x_135_cast_fp16, y = mul_11_y_0_to_fp16)[name = tensor<string, []>("mul_11_cast_fp16")];
tensor<bool, []> matmul_11_transpose_y_0 = const()[name = tensor<string, []>("matmul_11_transpose_y_0"), val = tensor<bool, []>(true)];
tensor<bool, []> matmul_11_transpose_x_0 = const()[name = tensor<string, []>("matmul_11_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_70_perm_0 = const()[name = tensor<string, []>("transpose_70_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_71_perm_0 = const()[name = tensor<string, []>("transpose_71_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<fp16, [1, 12, 128, 64]> transpose_71 = transpose(perm = transpose_71_perm_0, x = x_139_cast_fp16)[name = tensor<string, []>("transpose_73")];
tensor<fp16, [1, 12, 128, 64]> transpose_70 = transpose(perm = transpose_70_perm_0, x = mul_11_cast_fp16)[name = tensor<string, []>("transpose_74")];
tensor<fp16, [1, 12, 128, 128]> matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = transpose_70, y = transpose_71)[name = tensor<string, []>("matmul_11_cast_fp16")];
tensor<fp16, [1, 12, 128, 128]> add_11_cast_fp16 = add(x = matmul_11_cast_fp16, y = attention_mask_cast_fp16)[name = tensor<string, []>("add_11_cast_fp16")];
tensor<int32, []> softmax_11_axis_0 = const()[name = tensor<string, []>("softmax_11_axis_0"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 12, 128, 128]> softmax_11_cast_fp16 = softmax(axis = softmax_11_axis_0, x = add_11_cast_fp16)[name = tensor<string, []>("softmax_11_cast_fp16")];
tensor<bool, []> attn_output_45_transpose_x_0 = const()[name = tensor<string, []>("attn_output_45_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> attn_output_45_transpose_y_0 = const()[name = tensor<string, []>("attn_output_45_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 128, 64]> value_layer_cast_fp16 = transpose(perm = var_883, x = x_cast_fp16)[name = tensor<string, []>("transpose_75")];
tensor<fp16, [1, 12, 128, 64]> attn_output_45_cast_fp16 = matmul(transpose_x = attn_output_45_transpose_x_0, transpose_y = attn_output_45_transpose_y_0, x = softmax_11_cast_fp16, y = value_layer_cast_fp16)[name = tensor<string, []>("attn_output_45_cast_fp16")];
tensor<int32, [4]> attn_output_perm_0 = const()[name = tensor<string, []>("attn_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_887 = const()[name = tensor<string, []>("op_887"), val = tensor<int32, [3]>([1, 128, 768])];
tensor<fp16, [1, 128, 12, 64]> attn_output_cast_fp16 = transpose(perm = attn_output_perm_0, x = attn_output_45_cast_fp16)[name = tensor<string, []>("transpose_72")];
tensor<fp16, [1, 128, 768]> input_185_cast_fp16 = reshape(shape = var_887, x = attn_output_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
tensor<fp16, [768, 768]> model_bert_encoder_layer_11_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(204422912)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205602624)))];
tensor<fp16, [1, 128, 768]> linear_69_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_output_dense_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("linear_69_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_189_cast_fp16 = add(x = linear_69_cast_fp16, y = hidden_states_67_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
tensor<int32, [1]> input_191_axes_0 = const()[name = tensor<string, []>("input_191_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_11_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205604224)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205605824)))];
tensor<fp16, [1, 128, 768]> input_191_cast_fp16 = layer_norm(axes = input_191_axes_0, beta = model_bert_encoder_layer_11_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_11_attention_output_LayerNorm_weight_to_fp16, x = input_189_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
tensor<fp16, [3072, 768]> model_bert_encoder_layer_11_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205607424)))];
tensor<fp16, [3072]> model_bert_encoder_layer_11_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210326080)))];
tensor<fp16, [1, 128, 3072]> linear_70_cast_fp16 = linear(bias = model_bert_encoder_layer_11_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_11_intermediate_dense_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("linear_70_cast_fp16")];
tensor<string, []> input_195_mode_0 = const()[name = tensor<string, []>("input_195_mode_0"), val = tensor<string, []>("EXACT")];
tensor<fp16, [1, 128, 3072]> input_195_cast_fp16 = gelu(mode = input_195_mode_0, x = linear_70_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
tensor<fp16, [768, 3072]> model_bert_encoder_layer_11_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_output_dense_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210332288)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215050944)))];
tensor<fp16, [1, 128, 768]> linear_71_cast_fp16 = linear(bias = model_bert_encoder_layer_11_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_11_output_dense_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("linear_71_cast_fp16")];
tensor<fp16, [1, 128, 768]> input_199_cast_fp16 = add(x = linear_71_cast_fp16, y = input_191_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
tensor<int32, [1]> input_201_axes_0 = const()[name = tensor<string, []>("input_201_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> model_bert_encoder_layer_11_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215052544)))];
tensor<fp16, [768]> model_bert_encoder_layer_11_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_bert_encoder_layer_11_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215054144)))];
tensor<fp16, [1, 128, 768]> input_201_cast_fp16 = layer_norm(axes = input_201_axes_0, beta = model_bert_encoder_layer_11_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_11_output_LayerNorm_weight_to_fp16, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
tensor<fp16, [47, 768]> model_classifier_weight_to_fp16 = const()[name = tensor<string, []>("model_classifier_weight_to_fp16"), val = tensor<fp16, [47, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215055744)))];
tensor<fp16, [47]> model_classifier_bias_to_fp16 = const()[name = tensor<string, []>("model_classifier_bias_to_fp16"), val = tensor<fp16, [47]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215128000)))];
tensor<fp16, [1, 128, 47]> linear_72_cast_fp16 = linear(bias = model_classifier_bias_to_fp16, weight = model_classifier_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("linear_72_cast_fp16")];
tensor<int32, []> var_920 = const()[name = tensor<string, []>("op_920"), val = tensor<int32, []>(-1)];
tensor<fp16, [1, 128, 47]> var_922_cast_fp16 = softmax(axis = var_920, x = linear_72_cast_fp16)[name = tensor<string, []>("op_922_cast_fp16")];
tensor<string, []> var_922_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_922_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
tensor<fp32, [1, 128, 47]> token_scores = cast(dtype = var_922_cast_fp16_to_fp32_dtype_0, x = var_922_cast_fp16)[name = tensor<string, []>("cast_53")];
} -> (token_scores);
}