program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.7.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})] { func main(tensor attention_mask, tensor input_ids) { tensor inputs_embeds_axis_0 = const()[name = tensor("inputs_embeds_axis_0"), val = tensor(0)]; tensor model_bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = tensor("model_bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, indices = input_ids, x = model_bert_embeddings_word_embeddings_weight_to_fp16)[name = tensor("inputs_embeds_cast_fp16")]; tensor token_type_embeddings_1_to_fp16 = const()[name = tensor("token_type_embeddings_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44537984)))]; tensor embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = token_type_embeddings_1_to_fp16)[name = tensor("embeddings_1_cast_fp16")]; tensor position_embeddings_1_to_fp16 = const()[name = tensor("position_embeddings_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44734656)))]; tensor input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = tensor("input_5_cast_fp16")]; tensor input_7_axes_0 = const()[name = tensor("input_7_axes_0"), val = tensor([-1])]; tensor model_bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44931328)))]; tensor model_bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44932928)))]; tensor var_10_to_fp16 = const()[name = tensor("op_10_to_fp16"), val = tensor(0x1p-24)]; tensor input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = model_bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor var_65_axes_0 = const()[name = tensor("op_65_axes_0"), val = tensor([1])]; tensor var_65 = expand_dims(axes = var_65_axes_0, x = attention_mask)[name = tensor("op_65")]; tensor var_66_axes_0 = const()[name = tensor("op_66_axes_0"), val = tensor([2])]; tensor var_66 = expand_dims(axes = var_66_axes_0, x = var_65)[name = tensor("op_66")]; tensor var_69_reps_0 = const()[name = tensor("op_69_reps_0"), val = tensor([1, 1, 128, 1])]; tensor var_69 = tile(reps = var_69_reps_0, x = var_66)[name = tensor("op_69")]; tensor const_5_to_fp16 = const()[name = tensor("const_5_to_fp16"), val = tensor(0x1p+0)]; tensor cast_3_to_fp16_dtype_0 = const()[name = tensor("cast_3_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_69_to_fp16 = cast(dtype = cast_3_to_fp16_dtype_0, x = var_69)[name = tensor("cast_55")]; tensor inverted_mask_cast_fp16 = sub(x = const_5_to_fp16, y = var_69_to_fp16)[name = tensor("inverted_mask_cast_fp16")]; tensor cast_4_dtype_0 = const()[name = tensor("cast_4_dtype_0"), val = tensor("bool")]; tensor var_20_to_fp16 = const()[name = tensor("op_20_to_fp16"), val = tensor(-inf)]; tensor inverted_mask_cast_fp16_to_bool = cast(dtype = cast_4_dtype_0, x = inverted_mask_cast_fp16)[name = tensor("cast_54")]; tensor attention_mask_cast_fp16 = select(a = var_20_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = tensor("attention_mask_cast_fp16")]; tensor model_bert_encoder_layer_0_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44934528)))]; tensor model_bert_encoder_layer_0_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46114240)))]; tensor linear_0_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_self_query_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("linear_0_cast_fp16")]; tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 128, 12, 64])]; tensor x_3_cast_fp16 = reshape(shape = var_115, x = linear_0_cast_fp16)[name = tensor("x_3_cast_fp16")]; tensor model_bert_encoder_layer_0_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46115840)))]; tensor model_bert_encoder_layer_0_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47295552)))]; tensor linear_1_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_self_key_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor var_124 = const()[name = tensor("op_124"), val = tensor([1, 128, 12, 64])]; tensor x_7_cast_fp16 = reshape(shape = var_124, x = linear_1_cast_fp16)[name = tensor("x_7_cast_fp16")]; tensor model_bert_encoder_layer_0_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47297152)))]; tensor model_bert_encoder_layer_0_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48476864)))]; tensor linear_2_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_self_value_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("linear_2_cast_fp16")]; tensor var_133 = const()[name = tensor("op_133"), val = tensor([1, 128, 12, 64])]; tensor x_11_cast_fp16 = reshape(shape = var_133, x = linear_2_cast_fp16)[name = tensor("x_11_cast_fp16")]; tensor var_135 = const()[name = tensor("op_135"), val = tensor([0, 2, 1, 3])]; tensor mul_0_y_0_to_fp16 = const()[name = tensor("mul_0_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_0_cast_fp16 = mul(x = x_3_cast_fp16, y = mul_0_y_0_to_fp16)[name = tensor("mul_0_cast_fp16")]; tensor matmul_0_transpose_y_0 = const()[name = tensor("matmul_0_transpose_y_0"), val = tensor(true)]; tensor matmul_0_transpose_x_0 = const()[name = tensor("matmul_0_transpose_x_0"), val = tensor(false)]; tensor transpose_48_perm_0 = const()[name = tensor("transpose_48_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_49_perm_0 = const()[name = tensor("transpose_49_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_49 = transpose(perm = transpose_49_perm_0, x = x_7_cast_fp16)[name = tensor("transpose_117")]; tensor transpose_48 = transpose(perm = transpose_48_perm_0, x = mul_0_cast_fp16)[name = tensor("transpose_118")]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = transpose_48, y = transpose_49)[name = tensor("matmul_0_cast_fp16")]; tensor add_0_cast_fp16 = add(x = matmul_0_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_0_cast_fp16")]; tensor softmax_0_axis_0 = const()[name = tensor("softmax_0_axis_0"), val = tensor(-1)]; tensor softmax_0_cast_fp16 = softmax(axis = softmax_0_axis_0, x = add_0_cast_fp16)[name = tensor("softmax_0_cast_fp16")]; tensor attn_output_1_transpose_x_0 = const()[name = tensor("attn_output_1_transpose_x_0"), val = tensor(false)]; tensor attn_output_1_transpose_y_0 = const()[name = tensor("attn_output_1_transpose_y_0"), val = tensor(false)]; tensor value_layer_1_cast_fp16 = transpose(perm = var_135, x = x_11_cast_fp16)[name = tensor("transpose_119")]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = softmax_0_cast_fp16, y = value_layer_1_cast_fp16)[name = tensor("attn_output_1_cast_fp16")]; tensor attn_output_3_perm_0 = const()[name = tensor("attn_output_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_139 = const()[name = tensor("op_139"), val = tensor([1, 128, 768])]; tensor attn_output_3_cast_fp16 = transpose(perm = attn_output_3_perm_0, x = attn_output_1_cast_fp16)[name = tensor("transpose_116")]; tensor input_9_cast_fp16 = reshape(shape = var_139, x = attn_output_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor model_bert_encoder_layer_0_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48478464)))]; tensor model_bert_encoder_layer_0_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49658176)))]; tensor linear_3_cast_fp16 = linear(bias = model_bert_encoder_layer_0_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_0_attention_output_dense_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("linear_3_cast_fp16")]; tensor input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_7_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_axes_0 = const()[name = tensor("input_15_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49659776)))]; tensor model_bert_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49661376)))]; tensor input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = model_bert_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor model_bert_encoder_layer_0_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49662976)))]; tensor model_bert_encoder_layer_0_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54381632)))]; tensor linear_4_cast_fp16 = linear(bias = model_bert_encoder_layer_0_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_0_intermediate_dense_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("linear_4_cast_fp16")]; tensor input_19_mode_0 = const()[name = tensor("input_19_mode_0"), val = tensor("EXACT")]; tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor model_bert_encoder_layer_0_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54387840)))]; tensor model_bert_encoder_layer_0_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59106496)))]; tensor linear_5_cast_fp16 = linear(bias = model_bert_encoder_layer_0_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_0_output_dense_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("linear_5_cast_fp16")]; tensor input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor hidden_states_7_axes_0 = const()[name = tensor("hidden_states_7_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_0_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59108096)))]; tensor model_bert_encoder_layer_0_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_0_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59109696)))]; tensor hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = model_bert_encoder_layer_0_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_0_output_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; tensor model_bert_encoder_layer_1_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59111296)))]; tensor model_bert_encoder_layer_1_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60291008)))]; tensor linear_6_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_self_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = tensor("linear_6_cast_fp16")]; tensor var_183 = const()[name = tensor("op_183"), val = tensor([1, 128, 12, 64])]; tensor x_15_cast_fp16 = reshape(shape = var_183, x = linear_6_cast_fp16)[name = tensor("x_15_cast_fp16")]; tensor model_bert_encoder_layer_1_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60292608)))]; tensor model_bert_encoder_layer_1_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61472320)))]; tensor linear_7_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_self_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor var_192 = const()[name = tensor("op_192"), val = tensor([1, 128, 12, 64])]; tensor x_19_cast_fp16 = reshape(shape = var_192, x = linear_7_cast_fp16)[name = tensor("x_19_cast_fp16")]; tensor model_bert_encoder_layer_1_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61473920)))]; tensor model_bert_encoder_layer_1_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62653632)))]; tensor linear_8_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_self_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor var_201 = const()[name = tensor("op_201"), val = tensor([1, 128, 12, 64])]; tensor x_23_cast_fp16 = reshape(shape = var_201, x = linear_8_cast_fp16)[name = tensor("x_23_cast_fp16")]; tensor var_203 = const()[name = tensor("op_203"), val = tensor([0, 2, 1, 3])]; tensor mul_1_y_0_to_fp16 = const()[name = tensor("mul_1_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_1_cast_fp16 = mul(x = x_15_cast_fp16, y = mul_1_y_0_to_fp16)[name = tensor("mul_1_cast_fp16")]; tensor matmul_1_transpose_y_0 = const()[name = tensor("matmul_1_transpose_y_0"), val = tensor(true)]; tensor matmul_1_transpose_x_0 = const()[name = tensor("matmul_1_transpose_x_0"), val = tensor(false)]; tensor transpose_50_perm_0 = const()[name = tensor("transpose_50_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_51_perm_0 = const()[name = tensor("transpose_51_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_51 = transpose(perm = transpose_51_perm_0, x = x_19_cast_fp16)[name = tensor("transpose_113")]; tensor transpose_50 = transpose(perm = transpose_50_perm_0, x = mul_1_cast_fp16)[name = tensor("transpose_114")]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = transpose_50, y = transpose_51)[name = tensor("matmul_1_cast_fp16")]; tensor add_1_cast_fp16 = add(x = matmul_1_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_1_cast_fp16")]; tensor softmax_1_axis_0 = const()[name = tensor("softmax_1_axis_0"), val = tensor(-1)]; tensor softmax_1_cast_fp16 = softmax(axis = softmax_1_axis_0, x = add_1_cast_fp16)[name = tensor("softmax_1_cast_fp16")]; tensor attn_output_5_transpose_x_0 = const()[name = tensor("attn_output_5_transpose_x_0"), val = tensor(false)]; tensor attn_output_5_transpose_y_0 = const()[name = tensor("attn_output_5_transpose_y_0"), val = tensor(false)]; tensor value_layer_3_cast_fp16 = transpose(perm = var_203, x = x_23_cast_fp16)[name = tensor("transpose_115")]; tensor attn_output_5_cast_fp16 = matmul(transpose_x = attn_output_5_transpose_x_0, transpose_y = attn_output_5_transpose_y_0, x = softmax_1_cast_fp16, y = value_layer_3_cast_fp16)[name = tensor("attn_output_5_cast_fp16")]; tensor attn_output_7_perm_0 = const()[name = tensor("attn_output_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_207 = const()[name = tensor("op_207"), val = tensor([1, 128, 768])]; tensor attn_output_7_cast_fp16 = transpose(perm = attn_output_7_perm_0, x = attn_output_5_cast_fp16)[name = tensor("transpose_112")]; tensor input_25_cast_fp16 = reshape(shape = var_207, x = attn_output_7_cast_fp16)[name = tensor("input_25_cast_fp16")]; tensor model_bert_encoder_layer_1_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62655232)))]; tensor model_bert_encoder_layer_1_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63834944)))]; tensor linear_9_cast_fp16 = linear(bias = model_bert_encoder_layer_1_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_1_attention_output_dense_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor input_29_cast_fp16 = add(x = linear_9_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor input_31_axes_0 = const()[name = tensor("input_31_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63836544)))]; tensor model_bert_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63838144)))]; tensor input_31_cast_fp16 = layer_norm(axes = input_31_axes_0, beta = model_bert_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor model_bert_encoder_layer_1_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63839744)))]; tensor model_bert_encoder_layer_1_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68558400)))]; tensor linear_10_cast_fp16 = linear(bias = model_bert_encoder_layer_1_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_1_intermediate_dense_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("linear_10_cast_fp16")]; tensor input_35_mode_0 = const()[name = tensor("input_35_mode_0"), val = tensor("EXACT")]; tensor input_35_cast_fp16 = gelu(mode = input_35_mode_0, x = linear_10_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor model_bert_encoder_layer_1_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68564608)))]; tensor model_bert_encoder_layer_1_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73283264)))]; tensor linear_11_cast_fp16 = linear(bias = model_bert_encoder_layer_1_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_1_output_dense_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("linear_11_cast_fp16")]; tensor input_39_cast_fp16 = add(x = linear_11_cast_fp16, y = input_31_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor hidden_states_13_axes_0 = const()[name = tensor("hidden_states_13_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_1_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73284864)))]; tensor model_bert_encoder_layer_1_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_1_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73286464)))]; tensor hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = model_bert_encoder_layer_1_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_1_output_LayerNorm_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; tensor model_bert_encoder_layer_2_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73288064)))]; tensor model_bert_encoder_layer_2_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74467776)))]; tensor linear_12_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_self_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = tensor("linear_12_cast_fp16")]; tensor var_251 = const()[name = tensor("op_251"), val = tensor([1, 128, 12, 64])]; tensor x_27_cast_fp16 = reshape(shape = var_251, x = linear_12_cast_fp16)[name = tensor("x_27_cast_fp16")]; tensor model_bert_encoder_layer_2_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74469376)))]; tensor model_bert_encoder_layer_2_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75649088)))]; tensor linear_13_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_self_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = tensor("linear_13_cast_fp16")]; tensor var_260 = const()[name = tensor("op_260"), val = tensor([1, 128, 12, 64])]; tensor x_31_cast_fp16 = reshape(shape = var_260, x = linear_13_cast_fp16)[name = tensor("x_31_cast_fp16")]; tensor model_bert_encoder_layer_2_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75650688)))]; tensor model_bert_encoder_layer_2_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76830400)))]; tensor linear_14_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_self_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = tensor("linear_14_cast_fp16")]; tensor var_269 = const()[name = tensor("op_269"), val = tensor([1, 128, 12, 64])]; tensor x_35_cast_fp16 = reshape(shape = var_269, x = linear_14_cast_fp16)[name = tensor("x_35_cast_fp16")]; tensor var_271 = const()[name = tensor("op_271"), val = tensor([0, 2, 1, 3])]; tensor mul_2_y_0_to_fp16 = const()[name = tensor("mul_2_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_2_cast_fp16 = mul(x = x_27_cast_fp16, y = mul_2_y_0_to_fp16)[name = tensor("mul_2_cast_fp16")]; tensor matmul_2_transpose_y_0 = const()[name = tensor("matmul_2_transpose_y_0"), val = tensor(true)]; tensor matmul_2_transpose_x_0 = const()[name = tensor("matmul_2_transpose_x_0"), val = tensor(false)]; tensor transpose_52_perm_0 = const()[name = tensor("transpose_52_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_53_perm_0 = const()[name = tensor("transpose_53_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_53 = transpose(perm = transpose_53_perm_0, x = x_31_cast_fp16)[name = tensor("transpose_109")]; tensor transpose_52 = transpose(perm = transpose_52_perm_0, x = mul_2_cast_fp16)[name = tensor("transpose_110")]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = transpose_52, y = transpose_53)[name = tensor("matmul_2_cast_fp16")]; tensor add_2_cast_fp16 = add(x = matmul_2_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_2_cast_fp16")]; tensor softmax_2_axis_0 = const()[name = tensor("softmax_2_axis_0"), val = tensor(-1)]; tensor softmax_2_cast_fp16 = softmax(axis = softmax_2_axis_0, x = add_2_cast_fp16)[name = tensor("softmax_2_cast_fp16")]; tensor attn_output_9_transpose_x_0 = const()[name = tensor("attn_output_9_transpose_x_0"), val = tensor(false)]; tensor attn_output_9_transpose_y_0 = const()[name = tensor("attn_output_9_transpose_y_0"), val = tensor(false)]; tensor value_layer_5_cast_fp16 = transpose(perm = var_271, x = x_35_cast_fp16)[name = tensor("transpose_111")]; tensor attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = softmax_2_cast_fp16, y = value_layer_5_cast_fp16)[name = tensor("attn_output_9_cast_fp16")]; tensor attn_output_11_perm_0 = const()[name = tensor("attn_output_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_275 = const()[name = tensor("op_275"), val = tensor([1, 128, 768])]; tensor attn_output_11_cast_fp16 = transpose(perm = attn_output_11_perm_0, x = attn_output_9_cast_fp16)[name = tensor("transpose_108")]; tensor input_41_cast_fp16 = reshape(shape = var_275, x = attn_output_11_cast_fp16)[name = tensor("input_41_cast_fp16")]; tensor model_bert_encoder_layer_2_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76832000)))]; tensor model_bert_encoder_layer_2_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78011712)))]; tensor linear_15_cast_fp16 = linear(bias = model_bert_encoder_layer_2_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_2_attention_output_dense_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("linear_15_cast_fp16")]; tensor input_45_cast_fp16 = add(x = linear_15_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor input_47_axes_0 = const()[name = tensor("input_47_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78013312)))]; tensor model_bert_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78014912)))]; tensor input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = model_bert_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor model_bert_encoder_layer_2_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78016512)))]; tensor model_bert_encoder_layer_2_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82735168)))]; tensor linear_16_cast_fp16 = linear(bias = model_bert_encoder_layer_2_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_2_intermediate_dense_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("linear_16_cast_fp16")]; tensor input_51_mode_0 = const()[name = tensor("input_51_mode_0"), val = tensor("EXACT")]; tensor input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = linear_16_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor model_bert_encoder_layer_2_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82741376)))]; tensor model_bert_encoder_layer_2_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87460032)))]; tensor linear_17_cast_fp16 = linear(bias = model_bert_encoder_layer_2_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_2_output_dense_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("linear_17_cast_fp16")]; tensor input_55_cast_fp16 = add(x = linear_17_cast_fp16, y = input_47_cast_fp16)[name = tensor("input_55_cast_fp16")]; tensor hidden_states_19_axes_0 = const()[name = tensor("hidden_states_19_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_2_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87461632)))]; tensor model_bert_encoder_layer_2_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_2_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87463232)))]; tensor hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = model_bert_encoder_layer_2_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_2_output_LayerNorm_weight_to_fp16, x = input_55_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; tensor model_bert_encoder_layer_3_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87464832)))]; tensor model_bert_encoder_layer_3_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88644544)))]; tensor linear_18_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_self_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = tensor("linear_18_cast_fp16")]; tensor var_319 = const()[name = tensor("op_319"), val = tensor([1, 128, 12, 64])]; tensor x_39_cast_fp16 = reshape(shape = var_319, x = linear_18_cast_fp16)[name = tensor("x_39_cast_fp16")]; tensor model_bert_encoder_layer_3_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88646144)))]; tensor model_bert_encoder_layer_3_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89825856)))]; tensor linear_19_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_self_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = tensor("linear_19_cast_fp16")]; tensor var_328 = const()[name = tensor("op_328"), val = tensor([1, 128, 12, 64])]; tensor x_43_cast_fp16 = reshape(shape = var_328, x = linear_19_cast_fp16)[name = tensor("x_43_cast_fp16")]; tensor model_bert_encoder_layer_3_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89827456)))]; tensor model_bert_encoder_layer_3_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91007168)))]; tensor linear_20_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_self_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = tensor("linear_20_cast_fp16")]; tensor var_337 = const()[name = tensor("op_337"), val = tensor([1, 128, 12, 64])]; tensor x_47_cast_fp16 = reshape(shape = var_337, x = linear_20_cast_fp16)[name = tensor("x_47_cast_fp16")]; tensor var_339 = const()[name = tensor("op_339"), val = tensor([0, 2, 1, 3])]; tensor mul_3_y_0_to_fp16 = const()[name = tensor("mul_3_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_3_cast_fp16 = mul(x = x_39_cast_fp16, y = mul_3_y_0_to_fp16)[name = tensor("mul_3_cast_fp16")]; tensor matmul_3_transpose_y_0 = const()[name = tensor("matmul_3_transpose_y_0"), val = tensor(true)]; tensor matmul_3_transpose_x_0 = const()[name = tensor("matmul_3_transpose_x_0"), val = tensor(false)]; tensor transpose_54_perm_0 = const()[name = tensor("transpose_54_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_55_perm_0 = const()[name = tensor("transpose_55_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_55 = transpose(perm = transpose_55_perm_0, x = x_43_cast_fp16)[name = tensor("transpose_105")]; tensor transpose_54 = transpose(perm = transpose_54_perm_0, x = mul_3_cast_fp16)[name = tensor("transpose_106")]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = transpose_54, y = transpose_55)[name = tensor("matmul_3_cast_fp16")]; tensor add_3_cast_fp16 = add(x = matmul_3_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_3_cast_fp16")]; tensor softmax_3_axis_0 = const()[name = tensor("softmax_3_axis_0"), val = tensor(-1)]; tensor softmax_3_cast_fp16 = softmax(axis = softmax_3_axis_0, x = add_3_cast_fp16)[name = tensor("softmax_3_cast_fp16")]; tensor attn_output_13_transpose_x_0 = const()[name = tensor("attn_output_13_transpose_x_0"), val = tensor(false)]; tensor attn_output_13_transpose_y_0 = const()[name = tensor("attn_output_13_transpose_y_0"), val = tensor(false)]; tensor value_layer_7_cast_fp16 = transpose(perm = var_339, x = x_47_cast_fp16)[name = tensor("transpose_107")]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = softmax_3_cast_fp16, y = value_layer_7_cast_fp16)[name = tensor("attn_output_13_cast_fp16")]; tensor attn_output_15_perm_0 = const()[name = tensor("attn_output_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_343 = const()[name = tensor("op_343"), val = tensor([1, 128, 768])]; tensor attn_output_15_cast_fp16 = transpose(perm = attn_output_15_perm_0, x = attn_output_13_cast_fp16)[name = tensor("transpose_104")]; tensor input_57_cast_fp16 = reshape(shape = var_343, x = attn_output_15_cast_fp16)[name = tensor("input_57_cast_fp16")]; tensor model_bert_encoder_layer_3_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91008768)))]; tensor model_bert_encoder_layer_3_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92188480)))]; tensor linear_21_cast_fp16 = linear(bias = model_bert_encoder_layer_3_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_3_attention_output_dense_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("linear_21_cast_fp16")]; tensor input_61_cast_fp16 = add(x = linear_21_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor input_63_axes_0 = const()[name = tensor("input_63_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92190080)))]; tensor model_bert_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92191680)))]; tensor input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = model_bert_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor model_bert_encoder_layer_3_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92193280)))]; tensor model_bert_encoder_layer_3_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96911936)))]; tensor linear_22_cast_fp16 = linear(bias = model_bert_encoder_layer_3_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_3_intermediate_dense_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("linear_22_cast_fp16")]; tensor input_67_mode_0 = const()[name = tensor("input_67_mode_0"), val = tensor("EXACT")]; tensor input_67_cast_fp16 = gelu(mode = input_67_mode_0, x = linear_22_cast_fp16)[name = tensor("input_67_cast_fp16")]; tensor model_bert_encoder_layer_3_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96918144)))]; tensor model_bert_encoder_layer_3_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101636800)))]; tensor linear_23_cast_fp16 = linear(bias = model_bert_encoder_layer_3_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_3_output_dense_weight_to_fp16, x = input_67_cast_fp16)[name = tensor("linear_23_cast_fp16")]; tensor input_71_cast_fp16 = add(x = linear_23_cast_fp16, y = input_63_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor hidden_states_25_axes_0 = const()[name = tensor("hidden_states_25_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_3_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101638400)))]; tensor model_bert_encoder_layer_3_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_3_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101640000)))]; tensor hidden_states_25_cast_fp16 = layer_norm(axes = hidden_states_25_axes_0, beta = model_bert_encoder_layer_3_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_3_output_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; tensor model_bert_encoder_layer_4_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101641600)))]; tensor model_bert_encoder_layer_4_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102821312)))]; tensor linear_24_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_self_query_weight_to_fp16, x = hidden_states_25_cast_fp16)[name = tensor("linear_24_cast_fp16")]; tensor var_387 = const()[name = tensor("op_387"), val = tensor([1, 128, 12, 64])]; tensor x_51_cast_fp16 = reshape(shape = var_387, x = linear_24_cast_fp16)[name = tensor("x_51_cast_fp16")]; tensor model_bert_encoder_layer_4_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102822912)))]; tensor model_bert_encoder_layer_4_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104002624)))]; tensor linear_25_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_self_key_weight_to_fp16, x = hidden_states_25_cast_fp16)[name = tensor("linear_25_cast_fp16")]; tensor var_396 = const()[name = tensor("op_396"), val = tensor([1, 128, 12, 64])]; tensor x_55_cast_fp16 = reshape(shape = var_396, x = linear_25_cast_fp16)[name = tensor("x_55_cast_fp16")]; tensor model_bert_encoder_layer_4_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104004224)))]; tensor model_bert_encoder_layer_4_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105183936)))]; tensor linear_26_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_self_value_weight_to_fp16, x = hidden_states_25_cast_fp16)[name = tensor("linear_26_cast_fp16")]; tensor var_405 = const()[name = tensor("op_405"), val = tensor([1, 128, 12, 64])]; tensor x_59_cast_fp16 = reshape(shape = var_405, x = linear_26_cast_fp16)[name = tensor("x_59_cast_fp16")]; tensor var_407 = const()[name = tensor("op_407"), val = tensor([0, 2, 1, 3])]; tensor mul_4_y_0_to_fp16 = const()[name = tensor("mul_4_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_4_cast_fp16 = mul(x = x_51_cast_fp16, y = mul_4_y_0_to_fp16)[name = tensor("mul_4_cast_fp16")]; tensor matmul_4_transpose_y_0 = const()[name = tensor("matmul_4_transpose_y_0"), val = tensor(true)]; tensor matmul_4_transpose_x_0 = const()[name = tensor("matmul_4_transpose_x_0"), val = tensor(false)]; tensor transpose_56_perm_0 = const()[name = tensor("transpose_56_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_57_perm_0 = const()[name = tensor("transpose_57_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_57 = transpose(perm = transpose_57_perm_0, x = x_55_cast_fp16)[name = tensor("transpose_101")]; tensor transpose_56 = transpose(perm = transpose_56_perm_0, x = mul_4_cast_fp16)[name = tensor("transpose_102")]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = transpose_56, y = transpose_57)[name = tensor("matmul_4_cast_fp16")]; tensor add_4_cast_fp16 = add(x = matmul_4_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_4_cast_fp16")]; tensor softmax_4_axis_0 = const()[name = tensor("softmax_4_axis_0"), val = tensor(-1)]; tensor softmax_4_cast_fp16 = softmax(axis = softmax_4_axis_0, x = add_4_cast_fp16)[name = tensor("softmax_4_cast_fp16")]; tensor attn_output_17_transpose_x_0 = const()[name = tensor("attn_output_17_transpose_x_0"), val = tensor(false)]; tensor attn_output_17_transpose_y_0 = const()[name = tensor("attn_output_17_transpose_y_0"), val = tensor(false)]; tensor value_layer_9_cast_fp16 = transpose(perm = var_407, x = x_59_cast_fp16)[name = tensor("transpose_103")]; tensor attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = softmax_4_cast_fp16, y = value_layer_9_cast_fp16)[name = tensor("attn_output_17_cast_fp16")]; tensor attn_output_19_perm_0 = const()[name = tensor("attn_output_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_411 = const()[name = tensor("op_411"), val = tensor([1, 128, 768])]; tensor attn_output_19_cast_fp16 = transpose(perm = attn_output_19_perm_0, x = attn_output_17_cast_fp16)[name = tensor("transpose_100")]; tensor input_73_cast_fp16 = reshape(shape = var_411, x = attn_output_19_cast_fp16)[name = tensor("input_73_cast_fp16")]; tensor model_bert_encoder_layer_4_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105185536)))]; tensor model_bert_encoder_layer_4_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106365248)))]; tensor linear_27_cast_fp16 = linear(bias = model_bert_encoder_layer_4_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_4_attention_output_dense_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("linear_27_cast_fp16")]; tensor input_77_cast_fp16 = add(x = linear_27_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("input_77_cast_fp16")]; tensor input_79_axes_0 = const()[name = tensor("input_79_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106366848)))]; tensor model_bert_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106368448)))]; tensor input_79_cast_fp16 = layer_norm(axes = input_79_axes_0, beta = model_bert_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; tensor model_bert_encoder_layer_4_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106370048)))]; tensor model_bert_encoder_layer_4_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111088704)))]; tensor linear_28_cast_fp16 = linear(bias = model_bert_encoder_layer_4_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_4_intermediate_dense_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("linear_28_cast_fp16")]; tensor input_83_mode_0 = const()[name = tensor("input_83_mode_0"), val = tensor("EXACT")]; tensor input_83_cast_fp16 = gelu(mode = input_83_mode_0, x = linear_28_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor model_bert_encoder_layer_4_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111094912)))]; tensor model_bert_encoder_layer_4_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115813568)))]; tensor linear_29_cast_fp16 = linear(bias = model_bert_encoder_layer_4_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_4_output_dense_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("linear_29_cast_fp16")]; tensor input_87_cast_fp16 = add(x = linear_29_cast_fp16, y = input_79_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor hidden_states_31_axes_0 = const()[name = tensor("hidden_states_31_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_4_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115815168)))]; tensor model_bert_encoder_layer_4_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_4_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115816768)))]; tensor hidden_states_31_cast_fp16 = layer_norm(axes = hidden_states_31_axes_0, beta = model_bert_encoder_layer_4_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_4_output_LayerNorm_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("hidden_states_31_cast_fp16")]; tensor model_bert_encoder_layer_5_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115818368)))]; tensor model_bert_encoder_layer_5_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116998080)))]; tensor linear_30_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_self_query_weight_to_fp16, x = hidden_states_31_cast_fp16)[name = tensor("linear_30_cast_fp16")]; tensor var_455 = const()[name = tensor("op_455"), val = tensor([1, 128, 12, 64])]; tensor x_63_cast_fp16 = reshape(shape = var_455, x = linear_30_cast_fp16)[name = tensor("x_63_cast_fp16")]; tensor model_bert_encoder_layer_5_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116999680)))]; tensor model_bert_encoder_layer_5_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118179392)))]; tensor linear_31_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_self_key_weight_to_fp16, x = hidden_states_31_cast_fp16)[name = tensor("linear_31_cast_fp16")]; tensor var_464 = const()[name = tensor("op_464"), val = tensor([1, 128, 12, 64])]; tensor x_67_cast_fp16 = reshape(shape = var_464, x = linear_31_cast_fp16)[name = tensor("x_67_cast_fp16")]; tensor model_bert_encoder_layer_5_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118180992)))]; tensor model_bert_encoder_layer_5_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119360704)))]; tensor linear_32_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_self_value_weight_to_fp16, x = hidden_states_31_cast_fp16)[name = tensor("linear_32_cast_fp16")]; tensor var_473 = const()[name = tensor("op_473"), val = tensor([1, 128, 12, 64])]; tensor x_71_cast_fp16 = reshape(shape = var_473, x = linear_32_cast_fp16)[name = tensor("x_71_cast_fp16")]; tensor var_475 = const()[name = tensor("op_475"), val = tensor([0, 2, 1, 3])]; tensor mul_5_y_0_to_fp16 = const()[name = tensor("mul_5_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_5_cast_fp16 = mul(x = x_63_cast_fp16, y = mul_5_y_0_to_fp16)[name = tensor("mul_5_cast_fp16")]; tensor matmul_5_transpose_y_0 = const()[name = tensor("matmul_5_transpose_y_0"), val = tensor(true)]; tensor matmul_5_transpose_x_0 = const()[name = tensor("matmul_5_transpose_x_0"), val = tensor(false)]; tensor transpose_58_perm_0 = const()[name = tensor("transpose_58_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_59_perm_0 = const()[name = tensor("transpose_59_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_59 = transpose(perm = transpose_59_perm_0, x = x_67_cast_fp16)[name = tensor("transpose_97")]; tensor transpose_58 = transpose(perm = transpose_58_perm_0, x = mul_5_cast_fp16)[name = tensor("transpose_98")]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = transpose_58, y = transpose_59)[name = tensor("matmul_5_cast_fp16")]; tensor add_5_cast_fp16 = add(x = matmul_5_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_5_cast_fp16")]; tensor softmax_5_axis_0 = const()[name = tensor("softmax_5_axis_0"), val = tensor(-1)]; tensor softmax_5_cast_fp16 = softmax(axis = softmax_5_axis_0, x = add_5_cast_fp16)[name = tensor("softmax_5_cast_fp16")]; tensor attn_output_21_transpose_x_0 = const()[name = tensor("attn_output_21_transpose_x_0"), val = tensor(false)]; tensor attn_output_21_transpose_y_0 = const()[name = tensor("attn_output_21_transpose_y_0"), val = tensor(false)]; tensor value_layer_11_cast_fp16 = transpose(perm = var_475, x = x_71_cast_fp16)[name = tensor("transpose_99")]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = softmax_5_cast_fp16, y = value_layer_11_cast_fp16)[name = tensor("attn_output_21_cast_fp16")]; tensor attn_output_23_perm_0 = const()[name = tensor("attn_output_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_479 = const()[name = tensor("op_479"), val = tensor([1, 128, 768])]; tensor attn_output_23_cast_fp16 = transpose(perm = attn_output_23_perm_0, x = attn_output_21_cast_fp16)[name = tensor("transpose_96")]; tensor input_89_cast_fp16 = reshape(shape = var_479, x = attn_output_23_cast_fp16)[name = tensor("input_89_cast_fp16")]; tensor model_bert_encoder_layer_5_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119362304)))]; tensor model_bert_encoder_layer_5_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120542016)))]; tensor linear_33_cast_fp16 = linear(bias = model_bert_encoder_layer_5_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_5_attention_output_dense_weight_to_fp16, x = input_89_cast_fp16)[name = tensor("linear_33_cast_fp16")]; tensor input_93_cast_fp16 = add(x = linear_33_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor input_95_axes_0 = const()[name = tensor("input_95_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120543616)))]; tensor model_bert_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120545216)))]; tensor input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, beta = model_bert_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16, x = input_93_cast_fp16)[name = tensor("input_95_cast_fp16")]; tensor model_bert_encoder_layer_5_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120546816)))]; tensor model_bert_encoder_layer_5_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125265472)))]; tensor linear_34_cast_fp16 = linear(bias = model_bert_encoder_layer_5_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_5_intermediate_dense_weight_to_fp16, x = input_95_cast_fp16)[name = tensor("linear_34_cast_fp16")]; tensor input_99_mode_0 = const()[name = tensor("input_99_mode_0"), val = tensor("EXACT")]; tensor input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = linear_34_cast_fp16)[name = tensor("input_99_cast_fp16")]; tensor model_bert_encoder_layer_5_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125271680)))]; tensor model_bert_encoder_layer_5_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129990336)))]; tensor linear_35_cast_fp16 = linear(bias = model_bert_encoder_layer_5_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_5_output_dense_weight_to_fp16, x = input_99_cast_fp16)[name = tensor("linear_35_cast_fp16")]; tensor input_103_cast_fp16 = add(x = linear_35_cast_fp16, y = input_95_cast_fp16)[name = tensor("input_103_cast_fp16")]; tensor hidden_states_37_axes_0 = const()[name = tensor("hidden_states_37_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_5_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129991936)))]; tensor model_bert_encoder_layer_5_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_5_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129993536)))]; tensor hidden_states_37_cast_fp16 = layer_norm(axes = hidden_states_37_axes_0, beta = model_bert_encoder_layer_5_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_5_output_LayerNorm_weight_to_fp16, x = input_103_cast_fp16)[name = tensor("hidden_states_37_cast_fp16")]; tensor model_bert_encoder_layer_6_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129995136)))]; tensor model_bert_encoder_layer_6_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131174848)))]; tensor linear_36_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_self_query_weight_to_fp16, x = hidden_states_37_cast_fp16)[name = tensor("linear_36_cast_fp16")]; tensor var_523 = const()[name = tensor("op_523"), val = tensor([1, 128, 12, 64])]; tensor x_75_cast_fp16 = reshape(shape = var_523, x = linear_36_cast_fp16)[name = tensor("x_75_cast_fp16")]; tensor model_bert_encoder_layer_6_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131176448)))]; tensor model_bert_encoder_layer_6_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132356160)))]; tensor linear_37_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_self_key_weight_to_fp16, x = hidden_states_37_cast_fp16)[name = tensor("linear_37_cast_fp16")]; tensor var_532 = const()[name = tensor("op_532"), val = tensor([1, 128, 12, 64])]; tensor x_79_cast_fp16 = reshape(shape = var_532, x = linear_37_cast_fp16)[name = tensor("x_79_cast_fp16")]; tensor model_bert_encoder_layer_6_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132357760)))]; tensor model_bert_encoder_layer_6_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133537472)))]; tensor linear_38_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_self_value_weight_to_fp16, x = hidden_states_37_cast_fp16)[name = tensor("linear_38_cast_fp16")]; tensor var_541 = const()[name = tensor("op_541"), val = tensor([1, 128, 12, 64])]; tensor x_83_cast_fp16 = reshape(shape = var_541, x = linear_38_cast_fp16)[name = tensor("x_83_cast_fp16")]; tensor var_543 = const()[name = tensor("op_543"), val = tensor([0, 2, 1, 3])]; tensor mul_6_y_0_to_fp16 = const()[name = tensor("mul_6_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_6_cast_fp16 = mul(x = x_75_cast_fp16, y = mul_6_y_0_to_fp16)[name = tensor("mul_6_cast_fp16")]; tensor matmul_6_transpose_y_0 = const()[name = tensor("matmul_6_transpose_y_0"), val = tensor(true)]; tensor matmul_6_transpose_x_0 = const()[name = tensor("matmul_6_transpose_x_0"), val = tensor(false)]; tensor transpose_60_perm_0 = const()[name = tensor("transpose_60_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_61_perm_0 = const()[name = tensor("transpose_61_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_61 = transpose(perm = transpose_61_perm_0, x = x_79_cast_fp16)[name = tensor("transpose_93")]; tensor transpose_60 = transpose(perm = transpose_60_perm_0, x = mul_6_cast_fp16)[name = tensor("transpose_94")]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = transpose_60, y = transpose_61)[name = tensor("matmul_6_cast_fp16")]; tensor add_6_cast_fp16 = add(x = matmul_6_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_6_cast_fp16")]; tensor softmax_6_axis_0 = const()[name = tensor("softmax_6_axis_0"), val = tensor(-1)]; tensor softmax_6_cast_fp16 = softmax(axis = softmax_6_axis_0, x = add_6_cast_fp16)[name = tensor("softmax_6_cast_fp16")]; tensor attn_output_25_transpose_x_0 = const()[name = tensor("attn_output_25_transpose_x_0"), val = tensor(false)]; tensor attn_output_25_transpose_y_0 = const()[name = tensor("attn_output_25_transpose_y_0"), val = tensor(false)]; tensor value_layer_13_cast_fp16 = transpose(perm = var_543, x = x_83_cast_fp16)[name = tensor("transpose_95")]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = softmax_6_cast_fp16, y = value_layer_13_cast_fp16)[name = tensor("attn_output_25_cast_fp16")]; tensor attn_output_27_perm_0 = const()[name = tensor("attn_output_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_547 = const()[name = tensor("op_547"), val = tensor([1, 128, 768])]; tensor attn_output_27_cast_fp16 = transpose(perm = attn_output_27_perm_0, x = attn_output_25_cast_fp16)[name = tensor("transpose_92")]; tensor input_105_cast_fp16 = reshape(shape = var_547, x = attn_output_27_cast_fp16)[name = tensor("input_105_cast_fp16")]; tensor model_bert_encoder_layer_6_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133539072)))]; tensor model_bert_encoder_layer_6_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134718784)))]; tensor linear_39_cast_fp16 = linear(bias = model_bert_encoder_layer_6_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_6_attention_output_dense_weight_to_fp16, x = input_105_cast_fp16)[name = tensor("linear_39_cast_fp16")]; tensor input_109_cast_fp16 = add(x = linear_39_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor input_111_axes_0 = const()[name = tensor("input_111_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_6_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134720384)))]; tensor model_bert_encoder_layer_6_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134721984)))]; tensor input_111_cast_fp16 = layer_norm(axes = input_111_axes_0, beta = model_bert_encoder_layer_6_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_6_attention_output_LayerNorm_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor model_bert_encoder_layer_6_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134723584)))]; tensor model_bert_encoder_layer_6_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139442240)))]; tensor linear_40_cast_fp16 = linear(bias = model_bert_encoder_layer_6_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_6_intermediate_dense_weight_to_fp16, x = input_111_cast_fp16)[name = tensor("linear_40_cast_fp16")]; tensor input_115_mode_0 = const()[name = tensor("input_115_mode_0"), val = tensor("EXACT")]; tensor input_115_cast_fp16 = gelu(mode = input_115_mode_0, x = linear_40_cast_fp16)[name = tensor("input_115_cast_fp16")]; tensor model_bert_encoder_layer_6_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139448448)))]; tensor model_bert_encoder_layer_6_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144167104)))]; tensor linear_41_cast_fp16 = linear(bias = model_bert_encoder_layer_6_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_6_output_dense_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("linear_41_cast_fp16")]; tensor input_119_cast_fp16 = add(x = linear_41_cast_fp16, y = input_111_cast_fp16)[name = tensor("input_119_cast_fp16")]; tensor hidden_states_43_axes_0 = const()[name = tensor("hidden_states_43_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_6_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144168704)))]; tensor model_bert_encoder_layer_6_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_6_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144170304)))]; tensor hidden_states_43_cast_fp16 = layer_norm(axes = hidden_states_43_axes_0, beta = model_bert_encoder_layer_6_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_6_output_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = tensor("hidden_states_43_cast_fp16")]; tensor model_bert_encoder_layer_7_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144171904)))]; tensor model_bert_encoder_layer_7_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145351616)))]; tensor linear_42_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_self_query_weight_to_fp16, x = hidden_states_43_cast_fp16)[name = tensor("linear_42_cast_fp16")]; tensor var_591 = const()[name = tensor("op_591"), val = tensor([1, 128, 12, 64])]; tensor x_87_cast_fp16 = reshape(shape = var_591, x = linear_42_cast_fp16)[name = tensor("x_87_cast_fp16")]; tensor model_bert_encoder_layer_7_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145353216)))]; tensor model_bert_encoder_layer_7_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146532928)))]; tensor linear_43_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_self_key_weight_to_fp16, x = hidden_states_43_cast_fp16)[name = tensor("linear_43_cast_fp16")]; tensor var_600 = const()[name = tensor("op_600"), val = tensor([1, 128, 12, 64])]; tensor x_91_cast_fp16 = reshape(shape = var_600, x = linear_43_cast_fp16)[name = tensor("x_91_cast_fp16")]; tensor model_bert_encoder_layer_7_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146534528)))]; tensor model_bert_encoder_layer_7_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147714240)))]; tensor linear_44_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_self_value_weight_to_fp16, x = hidden_states_43_cast_fp16)[name = tensor("linear_44_cast_fp16")]; tensor var_609 = const()[name = tensor("op_609"), val = tensor([1, 128, 12, 64])]; tensor x_95_cast_fp16 = reshape(shape = var_609, x = linear_44_cast_fp16)[name = tensor("x_95_cast_fp16")]; tensor var_611 = const()[name = tensor("op_611"), val = tensor([0, 2, 1, 3])]; tensor mul_7_y_0_to_fp16 = const()[name = tensor("mul_7_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_7_cast_fp16 = mul(x = x_87_cast_fp16, y = mul_7_y_0_to_fp16)[name = tensor("mul_7_cast_fp16")]; tensor matmul_7_transpose_y_0 = const()[name = tensor("matmul_7_transpose_y_0"), val = tensor(true)]; tensor matmul_7_transpose_x_0 = const()[name = tensor("matmul_7_transpose_x_0"), val = tensor(false)]; tensor transpose_62_perm_0 = const()[name = tensor("transpose_62_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_63_perm_0 = const()[name = tensor("transpose_63_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_63 = transpose(perm = transpose_63_perm_0, x = x_91_cast_fp16)[name = tensor("transpose_89")]; tensor transpose_62 = transpose(perm = transpose_62_perm_0, x = mul_7_cast_fp16)[name = tensor("transpose_90")]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = transpose_62, y = transpose_63)[name = tensor("matmul_7_cast_fp16")]; tensor add_7_cast_fp16 = add(x = matmul_7_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_7_cast_fp16")]; tensor softmax_7_axis_0 = const()[name = tensor("softmax_7_axis_0"), val = tensor(-1)]; tensor softmax_7_cast_fp16 = softmax(axis = softmax_7_axis_0, x = add_7_cast_fp16)[name = tensor("softmax_7_cast_fp16")]; tensor attn_output_29_transpose_x_0 = const()[name = tensor("attn_output_29_transpose_x_0"), val = tensor(false)]; tensor attn_output_29_transpose_y_0 = const()[name = tensor("attn_output_29_transpose_y_0"), val = tensor(false)]; tensor value_layer_15_cast_fp16 = transpose(perm = var_611, x = x_95_cast_fp16)[name = tensor("transpose_91")]; tensor attn_output_29_cast_fp16 = matmul(transpose_x = attn_output_29_transpose_x_0, transpose_y = attn_output_29_transpose_y_0, x = softmax_7_cast_fp16, y = value_layer_15_cast_fp16)[name = tensor("attn_output_29_cast_fp16")]; tensor attn_output_31_perm_0 = const()[name = tensor("attn_output_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_615 = const()[name = tensor("op_615"), val = tensor([1, 128, 768])]; tensor attn_output_31_cast_fp16 = transpose(perm = attn_output_31_perm_0, x = attn_output_29_cast_fp16)[name = tensor("transpose_88")]; tensor input_121_cast_fp16 = reshape(shape = var_615, x = attn_output_31_cast_fp16)[name = tensor("input_121_cast_fp16")]; tensor model_bert_encoder_layer_7_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147715840)))]; tensor model_bert_encoder_layer_7_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148895552)))]; tensor linear_45_cast_fp16 = linear(bias = model_bert_encoder_layer_7_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_7_attention_output_dense_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("linear_45_cast_fp16")]; tensor input_125_cast_fp16 = add(x = linear_45_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor("input_125_cast_fp16")]; tensor input_127_axes_0 = const()[name = tensor("input_127_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_7_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148897152)))]; tensor model_bert_encoder_layer_7_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148898752)))]; tensor input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = model_bert_encoder_layer_7_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_7_attention_output_LayerNorm_weight_to_fp16, x = input_125_cast_fp16)[name = tensor("input_127_cast_fp16")]; tensor model_bert_encoder_layer_7_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148900352)))]; tensor model_bert_encoder_layer_7_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153619008)))]; tensor linear_46_cast_fp16 = linear(bias = model_bert_encoder_layer_7_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_7_intermediate_dense_weight_to_fp16, x = input_127_cast_fp16)[name = tensor("linear_46_cast_fp16")]; tensor input_131_mode_0 = const()[name = tensor("input_131_mode_0"), val = tensor("EXACT")]; tensor input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = linear_46_cast_fp16)[name = tensor("input_131_cast_fp16")]; tensor model_bert_encoder_layer_7_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153625216)))]; tensor model_bert_encoder_layer_7_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158343872)))]; tensor linear_47_cast_fp16 = linear(bias = model_bert_encoder_layer_7_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_7_output_dense_weight_to_fp16, x = input_131_cast_fp16)[name = tensor("linear_47_cast_fp16")]; tensor input_135_cast_fp16 = add(x = linear_47_cast_fp16, y = input_127_cast_fp16)[name = tensor("input_135_cast_fp16")]; tensor hidden_states_49_axes_0 = const()[name = tensor("hidden_states_49_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_7_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158345472)))]; tensor model_bert_encoder_layer_7_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_7_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158347072)))]; tensor hidden_states_49_cast_fp16 = layer_norm(axes = hidden_states_49_axes_0, beta = model_bert_encoder_layer_7_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_7_output_LayerNorm_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("hidden_states_49_cast_fp16")]; tensor model_bert_encoder_layer_8_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158348672)))]; tensor model_bert_encoder_layer_8_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159528384)))]; tensor linear_48_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_self_query_weight_to_fp16, x = hidden_states_49_cast_fp16)[name = tensor("linear_48_cast_fp16")]; tensor var_659 = const()[name = tensor("op_659"), val = tensor([1, 128, 12, 64])]; tensor x_99_cast_fp16 = reshape(shape = var_659, x = linear_48_cast_fp16)[name = tensor("x_99_cast_fp16")]; tensor model_bert_encoder_layer_8_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159529984)))]; tensor model_bert_encoder_layer_8_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160709696)))]; tensor linear_49_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_self_key_weight_to_fp16, x = hidden_states_49_cast_fp16)[name = tensor("linear_49_cast_fp16")]; tensor var_668 = const()[name = tensor("op_668"), val = tensor([1, 128, 12, 64])]; tensor x_103_cast_fp16 = reshape(shape = var_668, x = linear_49_cast_fp16)[name = tensor("x_103_cast_fp16")]; tensor model_bert_encoder_layer_8_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160711296)))]; tensor model_bert_encoder_layer_8_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161891008)))]; tensor linear_50_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_self_value_weight_to_fp16, x = hidden_states_49_cast_fp16)[name = tensor("linear_50_cast_fp16")]; tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 128, 12, 64])]; tensor x_107_cast_fp16 = reshape(shape = var_677, x = linear_50_cast_fp16)[name = tensor("x_107_cast_fp16")]; tensor var_679 = const()[name = tensor("op_679"), val = tensor([0, 2, 1, 3])]; tensor mul_8_y_0_to_fp16 = const()[name = tensor("mul_8_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_8_cast_fp16 = mul(x = x_99_cast_fp16, y = mul_8_y_0_to_fp16)[name = tensor("mul_8_cast_fp16")]; tensor matmul_8_transpose_y_0 = const()[name = tensor("matmul_8_transpose_y_0"), val = tensor(true)]; tensor matmul_8_transpose_x_0 = const()[name = tensor("matmul_8_transpose_x_0"), val = tensor(false)]; tensor transpose_64_perm_0 = const()[name = tensor("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_65_perm_0 = const()[name = tensor("transpose_65_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_65 = transpose(perm = transpose_65_perm_0, x = x_103_cast_fp16)[name = tensor("transpose_85")]; tensor transpose_64 = transpose(perm = transpose_64_perm_0, x = mul_8_cast_fp16)[name = tensor("transpose_86")]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = transpose_64, y = transpose_65)[name = tensor("matmul_8_cast_fp16")]; tensor add_8_cast_fp16 = add(x = matmul_8_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_8_cast_fp16")]; tensor softmax_8_axis_0 = const()[name = tensor("softmax_8_axis_0"), val = tensor(-1)]; tensor softmax_8_cast_fp16 = softmax(axis = softmax_8_axis_0, x = add_8_cast_fp16)[name = tensor("softmax_8_cast_fp16")]; tensor attn_output_33_transpose_x_0 = const()[name = tensor("attn_output_33_transpose_x_0"), val = tensor(false)]; tensor attn_output_33_transpose_y_0 = const()[name = tensor("attn_output_33_transpose_y_0"), val = tensor(false)]; tensor value_layer_17_cast_fp16 = transpose(perm = var_679, x = x_107_cast_fp16)[name = tensor("transpose_87")]; tensor attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = softmax_8_cast_fp16, y = value_layer_17_cast_fp16)[name = tensor("attn_output_33_cast_fp16")]; tensor attn_output_35_perm_0 = const()[name = tensor("attn_output_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_683 = const()[name = tensor("op_683"), val = tensor([1, 128, 768])]; tensor attn_output_35_cast_fp16 = transpose(perm = attn_output_35_perm_0, x = attn_output_33_cast_fp16)[name = tensor("transpose_84")]; tensor input_137_cast_fp16 = reshape(shape = var_683, x = attn_output_35_cast_fp16)[name = tensor("input_137_cast_fp16")]; tensor model_bert_encoder_layer_8_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161892608)))]; tensor model_bert_encoder_layer_8_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163072320)))]; tensor linear_51_cast_fp16 = linear(bias = model_bert_encoder_layer_8_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_8_attention_output_dense_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("linear_51_cast_fp16")]; tensor input_141_cast_fp16 = add(x = linear_51_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor("input_141_cast_fp16")]; tensor input_143_axes_0 = const()[name = tensor("input_143_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_8_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163073920)))]; tensor model_bert_encoder_layer_8_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163075520)))]; tensor input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = model_bert_encoder_layer_8_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_8_attention_output_LayerNorm_weight_to_fp16, x = input_141_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor model_bert_encoder_layer_8_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163077120)))]; tensor model_bert_encoder_layer_8_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167795776)))]; tensor linear_52_cast_fp16 = linear(bias = model_bert_encoder_layer_8_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_8_intermediate_dense_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("linear_52_cast_fp16")]; tensor input_147_mode_0 = const()[name = tensor("input_147_mode_0"), val = tensor("EXACT")]; tensor input_147_cast_fp16 = gelu(mode = input_147_mode_0, x = linear_52_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor model_bert_encoder_layer_8_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167801984)))]; tensor model_bert_encoder_layer_8_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172520640)))]; tensor linear_53_cast_fp16 = linear(bias = model_bert_encoder_layer_8_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_8_output_dense_weight_to_fp16, x = input_147_cast_fp16)[name = tensor("linear_53_cast_fp16")]; tensor input_151_cast_fp16 = add(x = linear_53_cast_fp16, y = input_143_cast_fp16)[name = tensor("input_151_cast_fp16")]; tensor hidden_states_55_axes_0 = const()[name = tensor("hidden_states_55_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_8_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172522240)))]; tensor model_bert_encoder_layer_8_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_8_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172523840)))]; tensor hidden_states_55_cast_fp16 = layer_norm(axes = hidden_states_55_axes_0, beta = model_bert_encoder_layer_8_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_8_output_LayerNorm_weight_to_fp16, x = input_151_cast_fp16)[name = tensor("hidden_states_55_cast_fp16")]; tensor model_bert_encoder_layer_9_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172525440)))]; tensor model_bert_encoder_layer_9_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173705152)))]; tensor linear_54_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_self_query_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = tensor("linear_54_cast_fp16")]; tensor var_727 = const()[name = tensor("op_727"), val = tensor([1, 128, 12, 64])]; tensor x_111_cast_fp16 = reshape(shape = var_727, x = linear_54_cast_fp16)[name = tensor("x_111_cast_fp16")]; tensor model_bert_encoder_layer_9_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173706752)))]; tensor model_bert_encoder_layer_9_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174886464)))]; tensor linear_55_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_self_key_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = tensor("linear_55_cast_fp16")]; tensor var_736 = const()[name = tensor("op_736"), val = tensor([1, 128, 12, 64])]; tensor x_115_cast_fp16 = reshape(shape = var_736, x = linear_55_cast_fp16)[name = tensor("x_115_cast_fp16")]; tensor model_bert_encoder_layer_9_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174888064)))]; tensor model_bert_encoder_layer_9_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176067776)))]; tensor linear_56_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_self_value_weight_to_fp16, x = hidden_states_55_cast_fp16)[name = tensor("linear_56_cast_fp16")]; tensor var_745 = const()[name = tensor("op_745"), val = tensor([1, 128, 12, 64])]; tensor x_119_cast_fp16 = reshape(shape = var_745, x = linear_56_cast_fp16)[name = tensor("x_119_cast_fp16")]; tensor var_747 = const()[name = tensor("op_747"), val = tensor([0, 2, 1, 3])]; tensor mul_9_y_0_to_fp16 = const()[name = tensor("mul_9_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_9_cast_fp16 = mul(x = x_111_cast_fp16, y = mul_9_y_0_to_fp16)[name = tensor("mul_9_cast_fp16")]; tensor matmul_9_transpose_y_0 = const()[name = tensor("matmul_9_transpose_y_0"), val = tensor(true)]; tensor matmul_9_transpose_x_0 = const()[name = tensor("matmul_9_transpose_x_0"), val = tensor(false)]; tensor transpose_66_perm_0 = const()[name = tensor("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_67_perm_0 = const()[name = tensor("transpose_67_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_67 = transpose(perm = transpose_67_perm_0, x = x_115_cast_fp16)[name = tensor("transpose_81")]; tensor transpose_66 = transpose(perm = transpose_66_perm_0, x = mul_9_cast_fp16)[name = tensor("transpose_82")]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = transpose_66, y = transpose_67)[name = tensor("matmul_9_cast_fp16")]; tensor add_9_cast_fp16 = add(x = matmul_9_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_9_cast_fp16")]; tensor softmax_9_axis_0 = const()[name = tensor("softmax_9_axis_0"), val = tensor(-1)]; tensor softmax_9_cast_fp16 = softmax(axis = softmax_9_axis_0, x = add_9_cast_fp16)[name = tensor("softmax_9_cast_fp16")]; tensor attn_output_37_transpose_x_0 = const()[name = tensor("attn_output_37_transpose_x_0"), val = tensor(false)]; tensor attn_output_37_transpose_y_0 = const()[name = tensor("attn_output_37_transpose_y_0"), val = tensor(false)]; tensor value_layer_19_cast_fp16 = transpose(perm = var_747, x = x_119_cast_fp16)[name = tensor("transpose_83")]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = softmax_9_cast_fp16, y = value_layer_19_cast_fp16)[name = tensor("attn_output_37_cast_fp16")]; tensor attn_output_39_perm_0 = const()[name = tensor("attn_output_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_751 = const()[name = tensor("op_751"), val = tensor([1, 128, 768])]; tensor attn_output_39_cast_fp16 = transpose(perm = attn_output_39_perm_0, x = attn_output_37_cast_fp16)[name = tensor("transpose_80")]; tensor input_153_cast_fp16 = reshape(shape = var_751, x = attn_output_39_cast_fp16)[name = tensor("input_153_cast_fp16")]; tensor model_bert_encoder_layer_9_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176069376)))]; tensor model_bert_encoder_layer_9_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177249088)))]; tensor linear_57_cast_fp16 = linear(bias = model_bert_encoder_layer_9_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_9_attention_output_dense_weight_to_fp16, x = input_153_cast_fp16)[name = tensor("linear_57_cast_fp16")]; tensor input_157_cast_fp16 = add(x = linear_57_cast_fp16, y = hidden_states_55_cast_fp16)[name = tensor("input_157_cast_fp16")]; tensor input_159_axes_0 = const()[name = tensor("input_159_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_9_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177250688)))]; tensor model_bert_encoder_layer_9_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177252288)))]; tensor input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, beta = model_bert_encoder_layer_9_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_9_attention_output_LayerNorm_weight_to_fp16, x = input_157_cast_fp16)[name = tensor("input_159_cast_fp16")]; tensor model_bert_encoder_layer_9_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177253888)))]; tensor model_bert_encoder_layer_9_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181972544)))]; tensor linear_58_cast_fp16 = linear(bias = model_bert_encoder_layer_9_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_9_intermediate_dense_weight_to_fp16, x = input_159_cast_fp16)[name = tensor("linear_58_cast_fp16")]; tensor input_163_mode_0 = const()[name = tensor("input_163_mode_0"), val = tensor("EXACT")]; tensor input_163_cast_fp16 = gelu(mode = input_163_mode_0, x = linear_58_cast_fp16)[name = tensor("input_163_cast_fp16")]; tensor model_bert_encoder_layer_9_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181978752)))]; tensor model_bert_encoder_layer_9_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186697408)))]; tensor linear_59_cast_fp16 = linear(bias = model_bert_encoder_layer_9_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_9_output_dense_weight_to_fp16, x = input_163_cast_fp16)[name = tensor("linear_59_cast_fp16")]; tensor input_167_cast_fp16 = add(x = linear_59_cast_fp16, y = input_159_cast_fp16)[name = tensor("input_167_cast_fp16")]; tensor hidden_states_61_axes_0 = const()[name = tensor("hidden_states_61_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_9_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186699008)))]; tensor model_bert_encoder_layer_9_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_9_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186700608)))]; tensor hidden_states_61_cast_fp16 = layer_norm(axes = hidden_states_61_axes_0, beta = model_bert_encoder_layer_9_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_9_output_LayerNorm_weight_to_fp16, x = input_167_cast_fp16)[name = tensor("hidden_states_61_cast_fp16")]; tensor model_bert_encoder_layer_10_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186702208)))]; tensor model_bert_encoder_layer_10_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187881920)))]; tensor linear_60_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_self_query_weight_to_fp16, x = hidden_states_61_cast_fp16)[name = tensor("linear_60_cast_fp16")]; tensor var_795 = const()[name = tensor("op_795"), val = tensor([1, 128, 12, 64])]; tensor x_123_cast_fp16 = reshape(shape = var_795, x = linear_60_cast_fp16)[name = tensor("x_123_cast_fp16")]; tensor model_bert_encoder_layer_10_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187883520)))]; tensor model_bert_encoder_layer_10_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189063232)))]; tensor linear_61_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_self_key_weight_to_fp16, x = hidden_states_61_cast_fp16)[name = tensor("linear_61_cast_fp16")]; tensor var_804 = const()[name = tensor("op_804"), val = tensor([1, 128, 12, 64])]; tensor x_127_cast_fp16 = reshape(shape = var_804, x = linear_61_cast_fp16)[name = tensor("x_127_cast_fp16")]; tensor model_bert_encoder_layer_10_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189064832)))]; tensor model_bert_encoder_layer_10_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190244544)))]; tensor linear_62_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_self_value_weight_to_fp16, x = hidden_states_61_cast_fp16)[name = tensor("linear_62_cast_fp16")]; tensor var_813 = const()[name = tensor("op_813"), val = tensor([1, 128, 12, 64])]; tensor x_131_cast_fp16 = reshape(shape = var_813, x = linear_62_cast_fp16)[name = tensor("x_131_cast_fp16")]; tensor var_815 = const()[name = tensor("op_815"), val = tensor([0, 2, 1, 3])]; tensor mul_10_y_0_to_fp16 = const()[name = tensor("mul_10_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_10_cast_fp16 = mul(x = x_123_cast_fp16, y = mul_10_y_0_to_fp16)[name = tensor("mul_10_cast_fp16")]; tensor matmul_10_transpose_y_0 = const()[name = tensor("matmul_10_transpose_y_0"), val = tensor(true)]; tensor matmul_10_transpose_x_0 = const()[name = tensor("matmul_10_transpose_x_0"), val = tensor(false)]; tensor transpose_68_perm_0 = const()[name = tensor("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_69_perm_0 = const()[name = tensor("transpose_69_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_69 = transpose(perm = transpose_69_perm_0, x = x_127_cast_fp16)[name = tensor("transpose_77")]; tensor transpose_68 = transpose(perm = transpose_68_perm_0, x = mul_10_cast_fp16)[name = tensor("transpose_78")]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = transpose_68, y = transpose_69)[name = tensor("matmul_10_cast_fp16")]; tensor add_10_cast_fp16 = add(x = matmul_10_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_10_cast_fp16")]; tensor softmax_10_axis_0 = const()[name = tensor("softmax_10_axis_0"), val = tensor(-1)]; tensor softmax_10_cast_fp16 = softmax(axis = softmax_10_axis_0, x = add_10_cast_fp16)[name = tensor("softmax_10_cast_fp16")]; tensor attn_output_41_transpose_x_0 = const()[name = tensor("attn_output_41_transpose_x_0"), val = tensor(false)]; tensor attn_output_41_transpose_y_0 = const()[name = tensor("attn_output_41_transpose_y_0"), val = tensor(false)]; tensor value_layer_21_cast_fp16 = transpose(perm = var_815, x = x_131_cast_fp16)[name = tensor("transpose_79")]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = softmax_10_cast_fp16, y = value_layer_21_cast_fp16)[name = tensor("attn_output_41_cast_fp16")]; tensor attn_output_43_perm_0 = const()[name = tensor("attn_output_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_819 = const()[name = tensor("op_819"), val = tensor([1, 128, 768])]; tensor attn_output_43_cast_fp16 = transpose(perm = attn_output_43_perm_0, x = attn_output_41_cast_fp16)[name = tensor("transpose_76")]; tensor input_169_cast_fp16 = reshape(shape = var_819, x = attn_output_43_cast_fp16)[name = tensor("input_169_cast_fp16")]; tensor model_bert_encoder_layer_10_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190246144)))]; tensor model_bert_encoder_layer_10_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191425856)))]; tensor linear_63_cast_fp16 = linear(bias = model_bert_encoder_layer_10_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_10_attention_output_dense_weight_to_fp16, x = input_169_cast_fp16)[name = tensor("linear_63_cast_fp16")]; tensor input_173_cast_fp16 = add(x = linear_63_cast_fp16, y = hidden_states_61_cast_fp16)[name = tensor("input_173_cast_fp16")]; tensor input_175_axes_0 = const()[name = tensor("input_175_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_10_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191427456)))]; tensor model_bert_encoder_layer_10_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191429056)))]; tensor input_175_cast_fp16 = layer_norm(axes = input_175_axes_0, beta = model_bert_encoder_layer_10_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_10_attention_output_LayerNorm_weight_to_fp16, x = input_173_cast_fp16)[name = tensor("input_175_cast_fp16")]; tensor model_bert_encoder_layer_10_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191430656)))]; tensor model_bert_encoder_layer_10_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196149312)))]; tensor linear_64_cast_fp16 = linear(bias = model_bert_encoder_layer_10_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_10_intermediate_dense_weight_to_fp16, x = input_175_cast_fp16)[name = tensor("linear_64_cast_fp16")]; tensor input_179_mode_0 = const()[name = tensor("input_179_mode_0"), val = tensor("EXACT")]; tensor input_179_cast_fp16 = gelu(mode = input_179_mode_0, x = linear_64_cast_fp16)[name = tensor("input_179_cast_fp16")]; tensor model_bert_encoder_layer_10_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196155520)))]; tensor model_bert_encoder_layer_10_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200874176)))]; tensor linear_65_cast_fp16 = linear(bias = model_bert_encoder_layer_10_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_10_output_dense_weight_to_fp16, x = input_179_cast_fp16)[name = tensor("linear_65_cast_fp16")]; tensor input_183_cast_fp16 = add(x = linear_65_cast_fp16, y = input_175_cast_fp16)[name = tensor("input_183_cast_fp16")]; tensor hidden_states_67_axes_0 = const()[name = tensor("hidden_states_67_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_10_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200875776)))]; tensor model_bert_encoder_layer_10_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_10_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200877376)))]; tensor hidden_states_67_cast_fp16 = layer_norm(axes = hidden_states_67_axes_0, beta = model_bert_encoder_layer_10_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_10_output_LayerNorm_weight_to_fp16, x = input_183_cast_fp16)[name = tensor("hidden_states_67_cast_fp16")]; tensor model_bert_encoder_layer_11_attention_self_query_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_self_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200878976)))]; tensor model_bert_encoder_layer_11_attention_self_query_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_self_query_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202058688)))]; tensor linear_66_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_self_query_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_self_query_weight_to_fp16, x = hidden_states_67_cast_fp16)[name = tensor("linear_66_cast_fp16")]; tensor var_863 = const()[name = tensor("op_863"), val = tensor([1, 128, 12, 64])]; tensor x_135_cast_fp16 = reshape(shape = var_863, x = linear_66_cast_fp16)[name = tensor("x_135_cast_fp16")]; tensor model_bert_encoder_layer_11_attention_self_key_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_self_key_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(202060288)))]; tensor model_bert_encoder_layer_11_attention_self_key_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_self_key_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203240000)))]; tensor linear_67_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_self_key_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_self_key_weight_to_fp16, x = hidden_states_67_cast_fp16)[name = tensor("linear_67_cast_fp16")]; tensor var_872 = const()[name = tensor("op_872"), val = tensor([1, 128, 12, 64])]; tensor x_139_cast_fp16 = reshape(shape = var_872, x = linear_67_cast_fp16)[name = tensor("x_139_cast_fp16")]; tensor model_bert_encoder_layer_11_attention_self_value_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_self_value_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203241600)))]; tensor model_bert_encoder_layer_11_attention_self_value_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_self_value_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204421312)))]; tensor linear_68_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_self_value_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_self_value_weight_to_fp16, x = hidden_states_67_cast_fp16)[name = tensor("linear_68_cast_fp16")]; tensor var_881 = const()[name = tensor("op_881"), val = tensor([1, 128, 12, 64])]; tensor x_cast_fp16 = reshape(shape = var_881, x = linear_68_cast_fp16)[name = tensor("x_cast_fp16")]; tensor var_883 = const()[name = tensor("op_883"), val = tensor([0, 2, 1, 3])]; tensor mul_11_y_0_to_fp16 = const()[name = tensor("mul_11_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor mul_11_cast_fp16 = mul(x = x_135_cast_fp16, y = mul_11_y_0_to_fp16)[name = tensor("mul_11_cast_fp16")]; tensor matmul_11_transpose_y_0 = const()[name = tensor("matmul_11_transpose_y_0"), val = tensor(true)]; tensor matmul_11_transpose_x_0 = const()[name = tensor("matmul_11_transpose_x_0"), val = tensor(false)]; tensor transpose_70_perm_0 = const()[name = tensor("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_71_perm_0 = const()[name = tensor("transpose_71_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_71 = transpose(perm = transpose_71_perm_0, x = x_139_cast_fp16)[name = tensor("transpose_73")]; tensor transpose_70 = transpose(perm = transpose_70_perm_0, x = mul_11_cast_fp16)[name = tensor("transpose_74")]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = transpose_70, y = transpose_71)[name = tensor("matmul_11_cast_fp16")]; tensor add_11_cast_fp16 = add(x = matmul_11_cast_fp16, y = attention_mask_cast_fp16)[name = tensor("add_11_cast_fp16")]; tensor softmax_11_axis_0 = const()[name = tensor("softmax_11_axis_0"), val = tensor(-1)]; tensor softmax_11_cast_fp16 = softmax(axis = softmax_11_axis_0, x = add_11_cast_fp16)[name = tensor("softmax_11_cast_fp16")]; tensor attn_output_45_transpose_x_0 = const()[name = tensor("attn_output_45_transpose_x_0"), val = tensor(false)]; tensor attn_output_45_transpose_y_0 = const()[name = tensor("attn_output_45_transpose_y_0"), val = tensor(false)]; tensor value_layer_cast_fp16 = transpose(perm = var_883, x = x_cast_fp16)[name = tensor("transpose_75")]; tensor attn_output_45_cast_fp16 = matmul(transpose_x = attn_output_45_transpose_x_0, transpose_y = attn_output_45_transpose_y_0, x = softmax_11_cast_fp16, y = value_layer_cast_fp16)[name = tensor("attn_output_45_cast_fp16")]; tensor attn_output_perm_0 = const()[name = tensor("attn_output_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_887 = const()[name = tensor("op_887"), val = tensor([1, 128, 768])]; tensor attn_output_cast_fp16 = transpose(perm = attn_output_perm_0, x = attn_output_45_cast_fp16)[name = tensor("transpose_72")]; tensor input_185_cast_fp16 = reshape(shape = var_887, x = attn_output_cast_fp16)[name = tensor("input_185_cast_fp16")]; tensor model_bert_encoder_layer_11_attention_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204422912)))]; tensor model_bert_encoder_layer_11_attention_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205602624)))]; tensor linear_69_cast_fp16 = linear(bias = model_bert_encoder_layer_11_attention_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_11_attention_output_dense_weight_to_fp16, x = input_185_cast_fp16)[name = tensor("linear_69_cast_fp16")]; tensor input_189_cast_fp16 = add(x = linear_69_cast_fp16, y = hidden_states_67_cast_fp16)[name = tensor("input_189_cast_fp16")]; tensor input_191_axes_0 = const()[name = tensor("input_191_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_11_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205604224)))]; tensor model_bert_encoder_layer_11_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_attention_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205605824)))]; tensor input_191_cast_fp16 = layer_norm(axes = input_191_axes_0, beta = model_bert_encoder_layer_11_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_11_attention_output_LayerNorm_weight_to_fp16, x = input_189_cast_fp16)[name = tensor("input_191_cast_fp16")]; tensor model_bert_encoder_layer_11_intermediate_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_intermediate_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205607424)))]; tensor model_bert_encoder_layer_11_intermediate_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_intermediate_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210326080)))]; tensor linear_70_cast_fp16 = linear(bias = model_bert_encoder_layer_11_intermediate_dense_bias_to_fp16, weight = model_bert_encoder_layer_11_intermediate_dense_weight_to_fp16, x = input_191_cast_fp16)[name = tensor("linear_70_cast_fp16")]; tensor input_195_mode_0 = const()[name = tensor("input_195_mode_0"), val = tensor("EXACT")]; tensor input_195_cast_fp16 = gelu(mode = input_195_mode_0, x = linear_70_cast_fp16)[name = tensor("input_195_cast_fp16")]; tensor model_bert_encoder_layer_11_output_dense_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_output_dense_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210332288)))]; tensor model_bert_encoder_layer_11_output_dense_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_output_dense_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215050944)))]; tensor linear_71_cast_fp16 = linear(bias = model_bert_encoder_layer_11_output_dense_bias_to_fp16, weight = model_bert_encoder_layer_11_output_dense_weight_to_fp16, x = input_195_cast_fp16)[name = tensor("linear_71_cast_fp16")]; tensor input_199_cast_fp16 = add(x = linear_71_cast_fp16, y = input_191_cast_fp16)[name = tensor("input_199_cast_fp16")]; tensor input_201_axes_0 = const()[name = tensor("input_201_axes_0"), val = tensor([-1])]; tensor model_bert_encoder_layer_11_output_LayerNorm_weight_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_output_LayerNorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215052544)))]; tensor model_bert_encoder_layer_11_output_LayerNorm_bias_to_fp16 = const()[name = tensor("model_bert_encoder_layer_11_output_LayerNorm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215054144)))]; tensor input_201_cast_fp16 = layer_norm(axes = input_201_axes_0, beta = model_bert_encoder_layer_11_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_bert_encoder_layer_11_output_LayerNorm_weight_to_fp16, x = input_199_cast_fp16)[name = tensor("input_201_cast_fp16")]; tensor model_classifier_weight_to_fp16 = const()[name = tensor("model_classifier_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215055744)))]; tensor model_classifier_bias_to_fp16 = const()[name = tensor("model_classifier_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215128000)))]; tensor linear_72_cast_fp16 = linear(bias = model_classifier_bias_to_fp16, weight = model_classifier_weight_to_fp16, x = input_201_cast_fp16)[name = tensor("linear_72_cast_fp16")]; tensor var_920 = const()[name = tensor("op_920"), val = tensor(-1)]; tensor var_922_cast_fp16 = softmax(axis = var_920, x = linear_72_cast_fp16)[name = tensor("op_922_cast_fp16")]; tensor var_922_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_922_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor token_scores = cast(dtype = var_922_cast_fp16_to_fp32_dtype_0, x = var_922_cast_fp16)[name = tensor("cast_53")]; } -> (token_scores); }