program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})] { func main(tensor melspectrogram_features) { tensor var_54_pad_type_0 = const()[name = tensor("op_54_pad_type_0"), val = tensor("custom")]; tensor var_54_pad_0 = const()[name = tensor("op_54_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_54_strides_0 = const()[name = tensor("op_54_strides_0"), val = tensor([1, 1])]; tensor var_54_dilations_0 = const()[name = tensor("op_54_dilations_0"), val = tensor([1, 1])]; tensor var_54_groups_0 = const()[name = tensor("op_54_groups_0"), val = tensor(1)]; tensor var_29_to_fp16 = const()[name = tensor("op_29_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor var_35_to_fp16 = const()[name = tensor("op_35_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245888)))]; tensor var_54_cast_fp16 = conv(bias = var_35_to_fp16, dilations = var_54_dilations_0, groups = var_54_groups_0, pad = var_54_pad_0, pad_type = var_54_pad_type_0, strides = var_54_strides_0, weight = var_29_to_fp16, x = melspectrogram_features)[name = tensor("op_54_cast_fp16")]; tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_54_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; tensor var_94_pad_type_0 = const()[name = tensor("op_94_pad_type_0"), val = tensor("custom")]; tensor var_94_pad_0 = const()[name = tensor("op_94_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_94_strides_0 = const()[name = tensor("op_94_strides_0"), val = tensor([2, 2])]; tensor var_94_dilations_0 = const()[name = tensor("op_94_dilations_0"), val = tensor([1, 1])]; tensor var_94_groups_0 = const()[name = tensor("op_94_groups_0"), val = tensor(1)]; tensor var_69_to_fp16 = const()[name = tensor("op_69_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246976)))]; tensor var_75_to_fp16 = const()[name = tensor("op_75_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1819904)))]; tensor var_94_cast_fp16 = conv(bias = var_75_to_fp16, dilations = var_94_dilations_0, groups = var_94_groups_0, pad = var_94_pad_0, pad_type = var_94_pad_type_0, strides = var_94_strides_0, weight = var_69_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_94_cast_fp16")]; tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_94_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; tensor var_112_to_fp16 = const()[name = tensor("op_112_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1820992)))]; tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_112_to_fp16)[name = tensor("inputs_1_cast_fp16")]; tensor var_125 = const()[name = tensor("op_125"), val = tensor(3)]; tensor var_138 = const()[name = tensor("op_138"), val = tensor(1)]; tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_155_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3357056)))]; tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3358144)))]; tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3359232)))]; tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3360320)))]; tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; tensor query_1_pad_type_0 = const()[name = tensor("query_1_pad_type_0"), val = tensor("valid")]; tensor query_1_strides_0 = const()[name = tensor("query_1_strides_0"), val = tensor([1, 1])]; tensor query_1_pad_0 = const()[name = tensor("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_1_dilations_0 = const()[name = tensor("query_1_dilations_0"), val = tensor([1, 1])]; tensor query_1_groups_0 = const()[name = tensor("query_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3361408)))]; tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3885760)))]; tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("query_1_cast_fp16")]; tensor key_1_pad_type_0 = const()[name = tensor("key_1_pad_type_0"), val = tensor("valid")]; tensor key_1_strides_0 = const()[name = tensor("key_1_strides_0"), val = tensor([1, 1])]; tensor key_1_pad_0 = const()[name = tensor("key_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_1_dilations_0 = const()[name = tensor("key_1_dilations_0"), val = tensor([1, 1])]; tensor key_1_groups_0 = const()[name = tensor("key_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3886848)))]; tensor key_1_cast_fp16 = conv(dilations = key_1_dilations_0, groups = key_1_groups_0, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("key_1_cast_fp16")]; tensor value_1_pad_type_0 = const()[name = tensor("value_1_pad_type_0"), val = tensor("valid")]; tensor value_1_strides_0 = const()[name = tensor("value_1_strides_0"), val = tensor([1, 1])]; tensor value_1_pad_0 = const()[name = tensor("value_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_1_dilations_0 = const()[name = tensor("value_1_dilations_0"), val = tensor([1, 1])]; tensor value_1_groups_0 = const()[name = tensor("value_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4411200)))]; tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4935552)))]; tensor value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = value_1_dilations_0, groups = value_1_groups_0, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("value_1_cast_fp16")]; tensor var_190_begin_0 = const()[name = tensor("op_190_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_190_end_0 = const()[name = tensor("op_190_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_190_end_mask_0 = const()[name = tensor("op_190_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_190_cast_fp16 = slice_by_index(begin = var_190_begin_0, end = var_190_end_0, end_mask = var_190_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_190_cast_fp16")]; tensor var_194_begin_0 = const()[name = tensor("op_194_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_194_end_0 = const()[name = tensor("op_194_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_194_end_mask_0 = const()[name = tensor("op_194_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_194_cast_fp16 = slice_by_index(begin = var_194_begin_0, end = var_194_end_0, end_mask = var_194_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_194_cast_fp16")]; tensor var_198_begin_0 = const()[name = tensor("op_198_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_198_end_0 = const()[name = tensor("op_198_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_198_end_mask_0 = const()[name = tensor("op_198_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_198_cast_fp16 = slice_by_index(begin = var_198_begin_0, end = var_198_end_0, end_mask = var_198_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_198_cast_fp16")]; tensor var_202_begin_0 = const()[name = tensor("op_202_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_202_end_0 = const()[name = tensor("op_202_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_202_end_mask_0 = const()[name = tensor("op_202_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_202_cast_fp16 = slice_by_index(begin = var_202_begin_0, end = var_202_end_0, end_mask = var_202_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_202_cast_fp16")]; tensor var_206_begin_0 = const()[name = tensor("op_206_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_206_end_0 = const()[name = tensor("op_206_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_206_end_mask_0 = const()[name = tensor("op_206_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_206_cast_fp16 = slice_by_index(begin = var_206_begin_0, end = var_206_end_0, end_mask = var_206_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_206_cast_fp16")]; tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_210_cast_fp16")]; tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_214_cast_fp16")]; tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_218_cast_fp16")]; tensor var_221_begin_0 = const()[name = tensor("op_221_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_221_end_0 = const()[name = tensor("op_221_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_221_end_mask_0 = const()[name = tensor("op_221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_221_cast_fp16 = slice_by_index(begin = var_221_begin_0, end = var_221_end_0, end_mask = var_221_end_mask_0, x = var_190_cast_fp16)[name = tensor("op_221_cast_fp16")]; tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = var_190_cast_fp16)[name = tensor("op_222_cast_fp16")]; tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = var_190_cast_fp16)[name = tensor("op_223_cast_fp16")]; tensor var_224_begin_0 = const()[name = tensor("op_224_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_224_end_0 = const()[name = tensor("op_224_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_224_end_mask_0 = const()[name = tensor("op_224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_224_cast_fp16 = slice_by_index(begin = var_224_begin_0, end = var_224_end_0, end_mask = var_224_end_mask_0, x = var_190_cast_fp16)[name = tensor("op_224_cast_fp16")]; tensor var_225_begin_0 = const()[name = tensor("op_225_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_225_end_0 = const()[name = tensor("op_225_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_225_end_mask_0 = const()[name = tensor("op_225_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_225_cast_fp16 = slice_by_index(begin = var_225_begin_0, end = var_225_end_0, end_mask = var_225_end_mask_0, x = var_190_cast_fp16)[name = tensor("op_225_cast_fp16")]; tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = var_190_cast_fp16)[name = tensor("op_226_cast_fp16")]; tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = var_194_cast_fp16)[name = tensor("op_227_cast_fp16")]; tensor var_228_begin_0 = const()[name = tensor("op_228_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_228_end_0 = const()[name = tensor("op_228_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_228_end_mask_0 = const()[name = tensor("op_228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_228_cast_fp16 = slice_by_index(begin = var_228_begin_0, end = var_228_end_0, end_mask = var_228_end_mask_0, x = var_194_cast_fp16)[name = tensor("op_228_cast_fp16")]; tensor var_229_begin_0 = const()[name = tensor("op_229_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_229_end_0 = const()[name = tensor("op_229_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_229_end_mask_0 = const()[name = tensor("op_229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_229_cast_fp16 = slice_by_index(begin = var_229_begin_0, end = var_229_end_0, end_mask = var_229_end_mask_0, x = var_194_cast_fp16)[name = tensor("op_229_cast_fp16")]; tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = var_194_cast_fp16)[name = tensor("op_230_cast_fp16")]; tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = var_194_cast_fp16)[name = tensor("op_231_cast_fp16")]; tensor var_232_begin_0 = const()[name = tensor("op_232_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_232_end_0 = const()[name = tensor("op_232_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_232_end_mask_0 = const()[name = tensor("op_232_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_232_cast_fp16 = slice_by_index(begin = var_232_begin_0, end = var_232_end_0, end_mask = var_232_end_mask_0, x = var_194_cast_fp16)[name = tensor("op_232_cast_fp16")]; tensor var_233_begin_0 = const()[name = tensor("op_233_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_233_end_0 = const()[name = tensor("op_233_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_233_end_mask_0 = const()[name = tensor("op_233_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_233_cast_fp16 = slice_by_index(begin = var_233_begin_0, end = var_233_end_0, end_mask = var_233_end_mask_0, x = var_198_cast_fp16)[name = tensor("op_233_cast_fp16")]; tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = var_198_cast_fp16)[name = tensor("op_234_cast_fp16")]; tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = var_198_cast_fp16)[name = tensor("op_235_cast_fp16")]; tensor var_236_begin_0 = const()[name = tensor("op_236_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_236_end_0 = const()[name = tensor("op_236_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_236_end_mask_0 = const()[name = tensor("op_236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = var_236_end_0, end_mask = var_236_end_mask_0, x = var_198_cast_fp16)[name = tensor("op_236_cast_fp16")]; tensor var_237_begin_0 = const()[name = tensor("op_237_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_237_end_0 = const()[name = tensor("op_237_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_237_end_mask_0 = const()[name = tensor("op_237_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_237_cast_fp16 = slice_by_index(begin = var_237_begin_0, end = var_237_end_0, end_mask = var_237_end_mask_0, x = var_198_cast_fp16)[name = tensor("op_237_cast_fp16")]; tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = var_198_cast_fp16)[name = tensor("op_238_cast_fp16")]; tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = var_202_cast_fp16)[name = tensor("op_239_cast_fp16")]; tensor var_240_begin_0 = const()[name = tensor("op_240_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_240_end_0 = const()[name = tensor("op_240_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_240_end_mask_0 = const()[name = tensor("op_240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_240_cast_fp16 = slice_by_index(begin = var_240_begin_0, end = var_240_end_0, end_mask = var_240_end_mask_0, x = var_202_cast_fp16)[name = tensor("op_240_cast_fp16")]; tensor var_241_begin_0 = const()[name = tensor("op_241_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_241_end_0 = const()[name = tensor("op_241_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_241_end_mask_0 = const()[name = tensor("op_241_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_241_cast_fp16 = slice_by_index(begin = var_241_begin_0, end = var_241_end_0, end_mask = var_241_end_mask_0, x = var_202_cast_fp16)[name = tensor("op_241_cast_fp16")]; tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = var_202_cast_fp16)[name = tensor("op_242_cast_fp16")]; tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = var_202_cast_fp16)[name = tensor("op_243_cast_fp16")]; tensor var_244_begin_0 = const()[name = tensor("op_244_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_244_end_0 = const()[name = tensor("op_244_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_244_end_mask_0 = const()[name = tensor("op_244_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_244_cast_fp16 = slice_by_index(begin = var_244_begin_0, end = var_244_end_0, end_mask = var_244_end_mask_0, x = var_202_cast_fp16)[name = tensor("op_244_cast_fp16")]; tensor var_245_begin_0 = const()[name = tensor("op_245_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_245_end_0 = const()[name = tensor("op_245_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_245_end_mask_0 = const()[name = tensor("op_245_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_245_cast_fp16 = slice_by_index(begin = var_245_begin_0, end = var_245_end_0, end_mask = var_245_end_mask_0, x = var_206_cast_fp16)[name = tensor("op_245_cast_fp16")]; tensor var_246_begin_0 = const()[name = tensor("op_246_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_246_end_0 = const()[name = tensor("op_246_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_246_end_mask_0 = const()[name = tensor("op_246_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_246_cast_fp16 = slice_by_index(begin = var_246_begin_0, end = var_246_end_0, end_mask = var_246_end_mask_0, x = var_206_cast_fp16)[name = tensor("op_246_cast_fp16")]; tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = var_206_cast_fp16)[name = tensor("op_247_cast_fp16")]; tensor var_248_begin_0 = const()[name = tensor("op_248_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_248_end_0 = const()[name = tensor("op_248_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_248_end_mask_0 = const()[name = tensor("op_248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_248_cast_fp16 = slice_by_index(begin = var_248_begin_0, end = var_248_end_0, end_mask = var_248_end_mask_0, x = var_206_cast_fp16)[name = tensor("op_248_cast_fp16")]; tensor var_249_begin_0 = const()[name = tensor("op_249_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_249_end_0 = const()[name = tensor("op_249_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_249_end_mask_0 = const()[name = tensor("op_249_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_249_cast_fp16 = slice_by_index(begin = var_249_begin_0, end = var_249_end_0, end_mask = var_249_end_mask_0, x = var_206_cast_fp16)[name = tensor("op_249_cast_fp16")]; tensor var_250_begin_0 = const()[name = tensor("op_250_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_250_end_0 = const()[name = tensor("op_250_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_250_end_mask_0 = const()[name = tensor("op_250_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = var_250_end_0, end_mask = var_250_end_mask_0, x = var_206_cast_fp16)[name = tensor("op_250_cast_fp16")]; tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = var_210_cast_fp16)[name = tensor("op_251_cast_fp16")]; tensor var_252_begin_0 = const()[name = tensor("op_252_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_252_end_0 = const()[name = tensor("op_252_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_252_end_mask_0 = const()[name = tensor("op_252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_252_cast_fp16 = slice_by_index(begin = var_252_begin_0, end = var_252_end_0, end_mask = var_252_end_mask_0, x = var_210_cast_fp16)[name = tensor("op_252_cast_fp16")]; tensor var_253_begin_0 = const()[name = tensor("op_253_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_253_end_0 = const()[name = tensor("op_253_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_253_end_mask_0 = const()[name = tensor("op_253_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_253_cast_fp16 = slice_by_index(begin = var_253_begin_0, end = var_253_end_0, end_mask = var_253_end_mask_0, x = var_210_cast_fp16)[name = tensor("op_253_cast_fp16")]; tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = var_210_cast_fp16)[name = tensor("op_254_cast_fp16")]; tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = var_210_cast_fp16)[name = tensor("op_255_cast_fp16")]; tensor var_256_begin_0 = const()[name = tensor("op_256_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_256_end_0 = const()[name = tensor("op_256_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_256_end_mask_0 = const()[name = tensor("op_256_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_256_cast_fp16 = slice_by_index(begin = var_256_begin_0, end = var_256_end_0, end_mask = var_256_end_mask_0, x = var_210_cast_fp16)[name = tensor("op_256_cast_fp16")]; tensor var_257_begin_0 = const()[name = tensor("op_257_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_257_end_0 = const()[name = tensor("op_257_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_257_end_mask_0 = const()[name = tensor("op_257_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_257_cast_fp16 = slice_by_index(begin = var_257_begin_0, end = var_257_end_0, end_mask = var_257_end_mask_0, x = var_214_cast_fp16)[name = tensor("op_257_cast_fp16")]; tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = var_214_cast_fp16)[name = tensor("op_258_cast_fp16")]; tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = var_214_cast_fp16)[name = tensor("op_259_cast_fp16")]; tensor var_260_begin_0 = const()[name = tensor("op_260_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_260_end_0 = const()[name = tensor("op_260_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_260_end_mask_0 = const()[name = tensor("op_260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_260_cast_fp16 = slice_by_index(begin = var_260_begin_0, end = var_260_end_0, end_mask = var_260_end_mask_0, x = var_214_cast_fp16)[name = tensor("op_260_cast_fp16")]; tensor var_261_begin_0 = const()[name = tensor("op_261_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_261_end_0 = const()[name = tensor("op_261_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_261_end_mask_0 = const()[name = tensor("op_261_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_261_cast_fp16 = slice_by_index(begin = var_261_begin_0, end = var_261_end_0, end_mask = var_261_end_mask_0, x = var_214_cast_fp16)[name = tensor("op_261_cast_fp16")]; tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = var_214_cast_fp16)[name = tensor("op_262_cast_fp16")]; tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = var_218_cast_fp16)[name = tensor("op_263_cast_fp16")]; tensor var_264_begin_0 = const()[name = tensor("op_264_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_264_end_0 = const()[name = tensor("op_264_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_264_end_mask_0 = const()[name = tensor("op_264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_264_cast_fp16 = slice_by_index(begin = var_264_begin_0, end = var_264_end_0, end_mask = var_264_end_mask_0, x = var_218_cast_fp16)[name = tensor("op_264_cast_fp16")]; tensor var_265_begin_0 = const()[name = tensor("op_265_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_265_end_0 = const()[name = tensor("op_265_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_265_end_mask_0 = const()[name = tensor("op_265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_265_cast_fp16 = slice_by_index(begin = var_265_begin_0, end = var_265_end_0, end_mask = var_265_end_mask_0, x = var_218_cast_fp16)[name = tensor("op_265_cast_fp16")]; tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = var_218_cast_fp16)[name = tensor("op_266_cast_fp16")]; tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = var_218_cast_fp16)[name = tensor("op_267_cast_fp16")]; tensor var_268_begin_0 = const()[name = tensor("op_268_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_268_end_0 = const()[name = tensor("op_268_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_268_end_mask_0 = const()[name = tensor("op_268_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = var_268_end_0, end_mask = var_268_end_mask_0, x = var_218_cast_fp16)[name = tensor("op_268_cast_fp16")]; tensor k_1_perm_0 = const()[name = tensor("k_1_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_273_begin_0 = const()[name = tensor("op_273_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_273_end_0 = const()[name = tensor("op_273_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_273_end_mask_0 = const()[name = tensor("op_273_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor("transpose_5")]; tensor var_273_cast_fp16 = slice_by_index(begin = var_273_begin_0, end = var_273_end_0, end_mask = var_273_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_273_cast_fp16")]; tensor var_277_begin_0 = const()[name = tensor("op_277_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_277_end_0 = const()[name = tensor("op_277_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_277_end_mask_0 = const()[name = tensor("op_277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_277_cast_fp16 = slice_by_index(begin = var_277_begin_0, end = var_277_end_0, end_mask = var_277_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_277_cast_fp16")]; tensor var_281_begin_0 = const()[name = tensor("op_281_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_281_end_0 = const()[name = tensor("op_281_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_281_end_mask_0 = const()[name = tensor("op_281_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_281_cast_fp16 = slice_by_index(begin = var_281_begin_0, end = var_281_end_0, end_mask = var_281_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_281_cast_fp16")]; tensor var_285_begin_0 = const()[name = tensor("op_285_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_285_end_0 = const()[name = tensor("op_285_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_285_end_mask_0 = const()[name = tensor("op_285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_285_cast_fp16")]; tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_289_cast_fp16")]; tensor var_293_begin_0 = const()[name = tensor("op_293_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_293_end_0 = const()[name = tensor("op_293_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_293_end_mask_0 = const()[name = tensor("op_293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_293_cast_fp16 = slice_by_index(begin = var_293_begin_0, end = var_293_end_0, end_mask = var_293_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_293_cast_fp16")]; tensor var_297_begin_0 = const()[name = tensor("op_297_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_297_end_0 = const()[name = tensor("op_297_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_297_end_mask_0 = const()[name = tensor("op_297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_297_cast_fp16 = slice_by_index(begin = var_297_begin_0, end = var_297_end_0, end_mask = var_297_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_297_cast_fp16")]; tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_301_cast_fp16")]; tensor var_303_begin_0 = const()[name = tensor("op_303_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_303_end_0 = const()[name = tensor("op_303_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_303_end_mask_0 = const()[name = tensor("op_303_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_303_cast_fp16 = slice_by_index(begin = var_303_begin_0, end = var_303_end_0, end_mask = var_303_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_303_cast_fp16")]; tensor var_307_begin_0 = const()[name = tensor("op_307_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_307_end_0 = const()[name = tensor("op_307_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_307_end_mask_0 = const()[name = tensor("op_307_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_307_cast_fp16 = slice_by_index(begin = var_307_begin_0, end = var_307_end_0, end_mask = var_307_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_307_cast_fp16")]; tensor var_311_begin_0 = const()[name = tensor("op_311_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_311_end_0 = const()[name = tensor("op_311_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_311_end_mask_0 = const()[name = tensor("op_311_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_311_cast_fp16")]; tensor var_315_begin_0 = const()[name = tensor("op_315_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_315_end_0 = const()[name = tensor("op_315_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_315_end_mask_0 = const()[name = tensor("op_315_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_315_cast_fp16 = slice_by_index(begin = var_315_begin_0, end = var_315_end_0, end_mask = var_315_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_315_cast_fp16")]; tensor var_319_begin_0 = const()[name = tensor("op_319_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_319_end_0 = const()[name = tensor("op_319_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_319_end_mask_0 = const()[name = tensor("op_319_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_319_cast_fp16 = slice_by_index(begin = var_319_begin_0, end = var_319_end_0, end_mask = var_319_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_319_cast_fp16")]; tensor var_323_begin_0 = const()[name = tensor("op_323_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_323_end_0 = const()[name = tensor("op_323_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_323_end_mask_0 = const()[name = tensor("op_323_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_323_cast_fp16 = slice_by_index(begin = var_323_begin_0, end = var_323_end_0, end_mask = var_323_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_323_cast_fp16")]; tensor var_327_begin_0 = const()[name = tensor("op_327_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_327_end_0 = const()[name = tensor("op_327_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_327_end_mask_0 = const()[name = tensor("op_327_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_327_cast_fp16 = slice_by_index(begin = var_327_begin_0, end = var_327_end_0, end_mask = var_327_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_327_cast_fp16")]; tensor var_331_begin_0 = const()[name = tensor("op_331_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_331_end_0 = const()[name = tensor("op_331_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_331_end_mask_0 = const()[name = tensor("op_331_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_331_cast_fp16 = slice_by_index(begin = var_331_begin_0, end = var_331_end_0, end_mask = var_331_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1_equation_0, values = (var_273_cast_fp16, var_221_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3_equation_0, values = (var_273_cast_fp16, var_222_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5_equation_0, values = (var_273_cast_fp16, var_223_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5_cast_fp16")]; tensor _SplitHeadsQ__mh_w_7_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_7_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_7_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_7_equation_0, values = (var_273_cast_fp16, var_224_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_7_cast_fp16")]; tensor _SplitHeadsQ__mh_w_9_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_9_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_9_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_9_equation_0, values = (var_273_cast_fp16, var_225_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_9_cast_fp16")]; tensor _SplitHeadsQ__mh_w_11_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_11_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_11_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_11_equation_0, values = (var_273_cast_fp16, var_226_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_11_cast_fp16")]; tensor _SplitHeadsQ__mh_w_13_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_13_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_13_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_13_equation_0, values = (var_277_cast_fp16, var_227_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_13_cast_fp16")]; tensor _SplitHeadsQ__mh_w_15_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_15_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_15_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_15_equation_0, values = (var_277_cast_fp16, var_228_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_15_cast_fp16")]; tensor _SplitHeadsQ__mh_w_17_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_17_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_17_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_17_equation_0, values = (var_277_cast_fp16, var_229_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_17_cast_fp16")]; tensor _SplitHeadsQ__mh_w_19_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_19_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_19_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_19_equation_0, values = (var_277_cast_fp16, var_230_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_19_cast_fp16")]; tensor _SplitHeadsQ__mh_w_21_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_21_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_21_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_21_equation_0, values = (var_277_cast_fp16, var_231_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_21_cast_fp16")]; tensor _SplitHeadsQ__mh_w_23_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_23_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_23_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_23_equation_0, values = (var_277_cast_fp16, var_232_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_23_cast_fp16")]; tensor _SplitHeadsQ__mh_w_25_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_25_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_25_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_25_equation_0, values = (var_281_cast_fp16, var_233_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_25_cast_fp16")]; tensor _SplitHeadsQ__mh_w_27_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_27_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_27_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_27_equation_0, values = (var_281_cast_fp16, var_234_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_27_cast_fp16")]; tensor _SplitHeadsQ__mh_w_29_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_29_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_29_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_29_equation_0, values = (var_281_cast_fp16, var_235_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_29_cast_fp16")]; tensor _SplitHeadsQ__mh_w_31_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_31_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_31_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_31_equation_0, values = (var_281_cast_fp16, var_236_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_31_cast_fp16")]; tensor _SplitHeadsQ__mh_w_33_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_33_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_33_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_33_equation_0, values = (var_281_cast_fp16, var_237_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_33_cast_fp16")]; tensor _SplitHeadsQ__mh_w_35_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_35_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_35_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_35_equation_0, values = (var_281_cast_fp16, var_238_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_35_cast_fp16")]; tensor _SplitHeadsQ__mh_w_37_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_37_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_37_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_37_equation_0, values = (var_285_cast_fp16, var_239_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_37_cast_fp16")]; tensor _SplitHeadsQ__mh_w_39_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_39_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_39_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_39_equation_0, values = (var_285_cast_fp16, var_240_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_39_cast_fp16")]; tensor _SplitHeadsQ__mh_w_41_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_41_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_41_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_41_equation_0, values = (var_285_cast_fp16, var_241_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_41_cast_fp16")]; tensor _SplitHeadsQ__mh_w_43_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_43_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_43_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_43_equation_0, values = (var_285_cast_fp16, var_242_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_43_cast_fp16")]; tensor _SplitHeadsQ__mh_w_45_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_45_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_45_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_45_equation_0, values = (var_285_cast_fp16, var_243_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_45_cast_fp16")]; tensor _SplitHeadsQ__mh_w_47_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_47_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_47_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_47_equation_0, values = (var_285_cast_fp16, var_244_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_47_cast_fp16")]; tensor _SplitHeadsQ__mh_w_49_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_49_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_49_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_49_equation_0, values = (var_289_cast_fp16, var_245_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_49_cast_fp16")]; tensor _SplitHeadsQ__mh_w_51_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_51_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_51_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_51_equation_0, values = (var_289_cast_fp16, var_246_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_51_cast_fp16")]; tensor _SplitHeadsQ__mh_w_53_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_53_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_53_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_53_equation_0, values = (var_289_cast_fp16, var_247_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_53_cast_fp16")]; tensor _SplitHeadsQ__mh_w_55_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_55_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_55_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_55_equation_0, values = (var_289_cast_fp16, var_248_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_55_cast_fp16")]; tensor _SplitHeadsQ__mh_w_57_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_57_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_57_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_57_equation_0, values = (var_289_cast_fp16, var_249_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_57_cast_fp16")]; tensor _SplitHeadsQ__mh_w_59_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_59_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_59_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_59_equation_0, values = (var_289_cast_fp16, var_250_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_59_cast_fp16")]; tensor _SplitHeadsQ__mh_w_61_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_61_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_61_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_61_equation_0, values = (var_293_cast_fp16, var_251_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_61_cast_fp16")]; tensor _SplitHeadsQ__mh_w_63_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_63_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_63_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_63_equation_0, values = (var_293_cast_fp16, var_252_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_63_cast_fp16")]; tensor _SplitHeadsQ__mh_w_65_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_65_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_65_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_65_equation_0, values = (var_293_cast_fp16, var_253_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_65_cast_fp16")]; tensor _SplitHeadsQ__mh_w_67_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_67_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_67_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_67_equation_0, values = (var_293_cast_fp16, var_254_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_67_cast_fp16")]; tensor _SplitHeadsQ__mh_w_69_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_69_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_69_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_69_equation_0, values = (var_293_cast_fp16, var_255_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_69_cast_fp16")]; tensor _SplitHeadsQ__mh_w_71_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_71_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_71_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_71_equation_0, values = (var_293_cast_fp16, var_256_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_71_cast_fp16")]; tensor _SplitHeadsQ__mh_w_73_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_73_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_73_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_73_equation_0, values = (var_297_cast_fp16, var_257_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_73_cast_fp16")]; tensor _SplitHeadsQ__mh_w_75_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_75_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_75_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_75_equation_0, values = (var_297_cast_fp16, var_258_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_75_cast_fp16")]; tensor _SplitHeadsQ__mh_w_77_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_77_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_77_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_77_equation_0, values = (var_297_cast_fp16, var_259_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_77_cast_fp16")]; tensor _SplitHeadsQ__mh_w_79_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_79_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_79_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_79_equation_0, values = (var_297_cast_fp16, var_260_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_79_cast_fp16")]; tensor _SplitHeadsQ__mh_w_81_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_81_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_81_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_81_equation_0, values = (var_297_cast_fp16, var_261_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_81_cast_fp16")]; tensor _SplitHeadsQ__mh_w_83_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_83_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_83_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_83_equation_0, values = (var_297_cast_fp16, var_262_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_83_cast_fp16")]; tensor _SplitHeadsQ__mh_w_85_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_85_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_85_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_85_equation_0, values = (var_301_cast_fp16, var_263_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_85_cast_fp16")]; tensor _SplitHeadsQ__mh_w_87_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_87_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_87_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_87_equation_0, values = (var_301_cast_fp16, var_264_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_87_cast_fp16")]; tensor _SplitHeadsQ__mh_w_89_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_89_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_89_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_89_equation_0, values = (var_301_cast_fp16, var_265_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_89_cast_fp16")]; tensor _SplitHeadsQ__mh_w_91_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_91_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_91_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_91_equation_0, values = (var_301_cast_fp16, var_266_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_91_cast_fp16")]; tensor _SplitHeadsQ__mh_w_93_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_93_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_93_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_93_equation_0, values = (var_301_cast_fp16, var_267_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_93_cast_fp16")]; tensor _SplitHeadsQ__mh_w_95_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_95_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_95_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_95_equation_0, values = (var_301_cast_fp16, var_268_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_95_cast_fp16")]; tensor var_430_to_fp16 = const()[name = tensor("op_430_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1_cast_fp16, y = var_430_to_fp16)[name = tensor("aw_chunk_1_cast_fp16")]; tensor var_432_to_fp16 = const()[name = tensor("op_432_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3_cast_fp16, y = var_432_to_fp16)[name = tensor("aw_chunk_3_cast_fp16")]; tensor var_434_to_fp16 = const()[name = tensor("op_434_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5_cast_fp16, y = var_434_to_fp16)[name = tensor("aw_chunk_5_cast_fp16")]; tensor var_436_to_fp16 = const()[name = tensor("op_436_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_7_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_7_cast_fp16, y = var_436_to_fp16)[name = tensor("aw_chunk_7_cast_fp16")]; tensor var_438_to_fp16 = const()[name = tensor("op_438_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_9_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_9_cast_fp16, y = var_438_to_fp16)[name = tensor("aw_chunk_9_cast_fp16")]; tensor var_440_to_fp16 = const()[name = tensor("op_440_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_11_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_11_cast_fp16, y = var_440_to_fp16)[name = tensor("aw_chunk_11_cast_fp16")]; tensor var_442_to_fp16 = const()[name = tensor("op_442_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_13_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_13_cast_fp16, y = var_442_to_fp16)[name = tensor("aw_chunk_13_cast_fp16")]; tensor var_444_to_fp16 = const()[name = tensor("op_444_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_15_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_15_cast_fp16, y = var_444_to_fp16)[name = tensor("aw_chunk_15_cast_fp16")]; tensor var_446_to_fp16 = const()[name = tensor("op_446_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_17_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_17_cast_fp16, y = var_446_to_fp16)[name = tensor("aw_chunk_17_cast_fp16")]; tensor var_448_to_fp16 = const()[name = tensor("op_448_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_19_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_19_cast_fp16, y = var_448_to_fp16)[name = tensor("aw_chunk_19_cast_fp16")]; tensor var_450_to_fp16 = const()[name = tensor("op_450_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_21_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_21_cast_fp16, y = var_450_to_fp16)[name = tensor("aw_chunk_21_cast_fp16")]; tensor var_452_to_fp16 = const()[name = tensor("op_452_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_23_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_23_cast_fp16, y = var_452_to_fp16)[name = tensor("aw_chunk_23_cast_fp16")]; tensor var_454_to_fp16 = const()[name = tensor("op_454_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_25_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_25_cast_fp16, y = var_454_to_fp16)[name = tensor("aw_chunk_25_cast_fp16")]; tensor var_456_to_fp16 = const()[name = tensor("op_456_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_27_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_27_cast_fp16, y = var_456_to_fp16)[name = tensor("aw_chunk_27_cast_fp16")]; tensor var_458_to_fp16 = const()[name = tensor("op_458_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_29_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_29_cast_fp16, y = var_458_to_fp16)[name = tensor("aw_chunk_29_cast_fp16")]; tensor var_460_to_fp16 = const()[name = tensor("op_460_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_31_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_31_cast_fp16, y = var_460_to_fp16)[name = tensor("aw_chunk_31_cast_fp16")]; tensor var_462_to_fp16 = const()[name = tensor("op_462_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_33_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_33_cast_fp16, y = var_462_to_fp16)[name = tensor("aw_chunk_33_cast_fp16")]; tensor var_464_to_fp16 = const()[name = tensor("op_464_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_35_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_35_cast_fp16, y = var_464_to_fp16)[name = tensor("aw_chunk_35_cast_fp16")]; tensor var_466_to_fp16 = const()[name = tensor("op_466_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_37_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_37_cast_fp16, y = var_466_to_fp16)[name = tensor("aw_chunk_37_cast_fp16")]; tensor var_468_to_fp16 = const()[name = tensor("op_468_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_39_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_39_cast_fp16, y = var_468_to_fp16)[name = tensor("aw_chunk_39_cast_fp16")]; tensor var_470_to_fp16 = const()[name = tensor("op_470_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_41_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_41_cast_fp16, y = var_470_to_fp16)[name = tensor("aw_chunk_41_cast_fp16")]; tensor var_472_to_fp16 = const()[name = tensor("op_472_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_43_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_43_cast_fp16, y = var_472_to_fp16)[name = tensor("aw_chunk_43_cast_fp16")]; tensor var_474_to_fp16 = const()[name = tensor("op_474_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_45_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_45_cast_fp16, y = var_474_to_fp16)[name = tensor("aw_chunk_45_cast_fp16")]; tensor var_476_to_fp16 = const()[name = tensor("op_476_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_47_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_47_cast_fp16, y = var_476_to_fp16)[name = tensor("aw_chunk_47_cast_fp16")]; tensor var_478_to_fp16 = const()[name = tensor("op_478_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_49_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_49_cast_fp16, y = var_478_to_fp16)[name = tensor("aw_chunk_49_cast_fp16")]; tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_51_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_51_cast_fp16, y = var_480_to_fp16)[name = tensor("aw_chunk_51_cast_fp16")]; tensor var_482_to_fp16 = const()[name = tensor("op_482_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_53_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_53_cast_fp16, y = var_482_to_fp16)[name = tensor("aw_chunk_53_cast_fp16")]; tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_55_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_55_cast_fp16, y = var_484_to_fp16)[name = tensor("aw_chunk_55_cast_fp16")]; tensor var_486_to_fp16 = const()[name = tensor("op_486_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_57_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_57_cast_fp16, y = var_486_to_fp16)[name = tensor("aw_chunk_57_cast_fp16")]; tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_59_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_59_cast_fp16, y = var_488_to_fp16)[name = tensor("aw_chunk_59_cast_fp16")]; tensor var_490_to_fp16 = const()[name = tensor("op_490_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_61_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_61_cast_fp16, y = var_490_to_fp16)[name = tensor("aw_chunk_61_cast_fp16")]; tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_63_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_63_cast_fp16, y = var_492_to_fp16)[name = tensor("aw_chunk_63_cast_fp16")]; tensor var_494_to_fp16 = const()[name = tensor("op_494_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_65_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_65_cast_fp16, y = var_494_to_fp16)[name = tensor("aw_chunk_65_cast_fp16")]; tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_67_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_67_cast_fp16, y = var_496_to_fp16)[name = tensor("aw_chunk_67_cast_fp16")]; tensor var_498_to_fp16 = const()[name = tensor("op_498_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_69_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_69_cast_fp16, y = var_498_to_fp16)[name = tensor("aw_chunk_69_cast_fp16")]; tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_71_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_71_cast_fp16, y = var_500_to_fp16)[name = tensor("aw_chunk_71_cast_fp16")]; tensor var_502_to_fp16 = const()[name = tensor("op_502_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_73_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_73_cast_fp16, y = var_502_to_fp16)[name = tensor("aw_chunk_73_cast_fp16")]; tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_75_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_75_cast_fp16, y = var_504_to_fp16)[name = tensor("aw_chunk_75_cast_fp16")]; tensor var_506_to_fp16 = const()[name = tensor("op_506_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_77_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_77_cast_fp16, y = var_506_to_fp16)[name = tensor("aw_chunk_77_cast_fp16")]; tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_79_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_79_cast_fp16, y = var_508_to_fp16)[name = tensor("aw_chunk_79_cast_fp16")]; tensor var_510_to_fp16 = const()[name = tensor("op_510_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_81_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_81_cast_fp16, y = var_510_to_fp16)[name = tensor("aw_chunk_81_cast_fp16")]; tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_83_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_83_cast_fp16, y = var_512_to_fp16)[name = tensor("aw_chunk_83_cast_fp16")]; tensor var_514_to_fp16 = const()[name = tensor("op_514_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_85_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_85_cast_fp16, y = var_514_to_fp16)[name = tensor("aw_chunk_85_cast_fp16")]; tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_87_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_87_cast_fp16, y = var_516_to_fp16)[name = tensor("aw_chunk_87_cast_fp16")]; tensor var_518_to_fp16 = const()[name = tensor("op_518_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_89_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_89_cast_fp16, y = var_518_to_fp16)[name = tensor("aw_chunk_89_cast_fp16")]; tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_91_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_91_cast_fp16, y = var_520_to_fp16)[name = tensor("aw_chunk_91_cast_fp16")]; tensor var_522_to_fp16 = const()[name = tensor("op_522_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_93_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_93_cast_fp16, y = var_522_to_fp16)[name = tensor("aw_chunk_93_cast_fp16")]; tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_95_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_95_cast_fp16, y = var_524_to_fp16)[name = tensor("aw_chunk_95_cast_fp16")]; tensor var_526_cast_fp16 = softmax(axis = var_138, x = aw_chunk_1_cast_fp16)[name = tensor("op_526_cast_fp16")]; tensor var_527_cast_fp16 = softmax(axis = var_138, x = aw_chunk_3_cast_fp16)[name = tensor("op_527_cast_fp16")]; tensor var_528_cast_fp16 = softmax(axis = var_138, x = aw_chunk_5_cast_fp16)[name = tensor("op_528_cast_fp16")]; tensor var_529_cast_fp16 = softmax(axis = var_138, x = aw_chunk_7_cast_fp16)[name = tensor("op_529_cast_fp16")]; tensor var_530_cast_fp16 = softmax(axis = var_138, x = aw_chunk_9_cast_fp16)[name = tensor("op_530_cast_fp16")]; tensor var_531_cast_fp16 = softmax(axis = var_138, x = aw_chunk_11_cast_fp16)[name = tensor("op_531_cast_fp16")]; tensor var_532_cast_fp16 = softmax(axis = var_138, x = aw_chunk_13_cast_fp16)[name = tensor("op_532_cast_fp16")]; tensor var_533_cast_fp16 = softmax(axis = var_138, x = aw_chunk_15_cast_fp16)[name = tensor("op_533_cast_fp16")]; tensor var_534_cast_fp16 = softmax(axis = var_138, x = aw_chunk_17_cast_fp16)[name = tensor("op_534_cast_fp16")]; tensor var_535_cast_fp16 = softmax(axis = var_138, x = aw_chunk_19_cast_fp16)[name = tensor("op_535_cast_fp16")]; tensor var_536_cast_fp16 = softmax(axis = var_138, x = aw_chunk_21_cast_fp16)[name = tensor("op_536_cast_fp16")]; tensor var_537_cast_fp16 = softmax(axis = var_138, x = aw_chunk_23_cast_fp16)[name = tensor("op_537_cast_fp16")]; tensor var_538_cast_fp16 = softmax(axis = var_138, x = aw_chunk_25_cast_fp16)[name = tensor("op_538_cast_fp16")]; tensor var_539_cast_fp16 = softmax(axis = var_138, x = aw_chunk_27_cast_fp16)[name = tensor("op_539_cast_fp16")]; tensor var_540_cast_fp16 = softmax(axis = var_138, x = aw_chunk_29_cast_fp16)[name = tensor("op_540_cast_fp16")]; tensor var_541_cast_fp16 = softmax(axis = var_138, x = aw_chunk_31_cast_fp16)[name = tensor("op_541_cast_fp16")]; tensor var_542_cast_fp16 = softmax(axis = var_138, x = aw_chunk_33_cast_fp16)[name = tensor("op_542_cast_fp16")]; tensor var_543_cast_fp16 = softmax(axis = var_138, x = aw_chunk_35_cast_fp16)[name = tensor("op_543_cast_fp16")]; tensor var_544_cast_fp16 = softmax(axis = var_138, x = aw_chunk_37_cast_fp16)[name = tensor("op_544_cast_fp16")]; tensor var_545_cast_fp16 = softmax(axis = var_138, x = aw_chunk_39_cast_fp16)[name = tensor("op_545_cast_fp16")]; tensor var_546_cast_fp16 = softmax(axis = var_138, x = aw_chunk_41_cast_fp16)[name = tensor("op_546_cast_fp16")]; tensor var_547_cast_fp16 = softmax(axis = var_138, x = aw_chunk_43_cast_fp16)[name = tensor("op_547_cast_fp16")]; tensor var_548_cast_fp16 = softmax(axis = var_138, x = aw_chunk_45_cast_fp16)[name = tensor("op_548_cast_fp16")]; tensor var_549_cast_fp16 = softmax(axis = var_138, x = aw_chunk_47_cast_fp16)[name = tensor("op_549_cast_fp16")]; tensor var_550_cast_fp16 = softmax(axis = var_138, x = aw_chunk_49_cast_fp16)[name = tensor("op_550_cast_fp16")]; tensor var_551_cast_fp16 = softmax(axis = var_138, x = aw_chunk_51_cast_fp16)[name = tensor("op_551_cast_fp16")]; tensor var_552_cast_fp16 = softmax(axis = var_138, x = aw_chunk_53_cast_fp16)[name = tensor("op_552_cast_fp16")]; tensor var_553_cast_fp16 = softmax(axis = var_138, x = aw_chunk_55_cast_fp16)[name = tensor("op_553_cast_fp16")]; tensor var_554_cast_fp16 = softmax(axis = var_138, x = aw_chunk_57_cast_fp16)[name = tensor("op_554_cast_fp16")]; tensor var_555_cast_fp16 = softmax(axis = var_138, x = aw_chunk_59_cast_fp16)[name = tensor("op_555_cast_fp16")]; tensor var_556_cast_fp16 = softmax(axis = var_138, x = aw_chunk_61_cast_fp16)[name = tensor("op_556_cast_fp16")]; tensor var_557_cast_fp16 = softmax(axis = var_138, x = aw_chunk_63_cast_fp16)[name = tensor("op_557_cast_fp16")]; tensor var_558_cast_fp16 = softmax(axis = var_138, x = aw_chunk_65_cast_fp16)[name = tensor("op_558_cast_fp16")]; tensor var_559_cast_fp16 = softmax(axis = var_138, x = aw_chunk_67_cast_fp16)[name = tensor("op_559_cast_fp16")]; tensor var_560_cast_fp16 = softmax(axis = var_138, x = aw_chunk_69_cast_fp16)[name = tensor("op_560_cast_fp16")]; tensor var_561_cast_fp16 = softmax(axis = var_138, x = aw_chunk_71_cast_fp16)[name = tensor("op_561_cast_fp16")]; tensor var_562_cast_fp16 = softmax(axis = var_138, x = aw_chunk_73_cast_fp16)[name = tensor("op_562_cast_fp16")]; tensor var_563_cast_fp16 = softmax(axis = var_138, x = aw_chunk_75_cast_fp16)[name = tensor("op_563_cast_fp16")]; tensor var_564_cast_fp16 = softmax(axis = var_138, x = aw_chunk_77_cast_fp16)[name = tensor("op_564_cast_fp16")]; tensor var_565_cast_fp16 = softmax(axis = var_138, x = aw_chunk_79_cast_fp16)[name = tensor("op_565_cast_fp16")]; tensor var_566_cast_fp16 = softmax(axis = var_138, x = aw_chunk_81_cast_fp16)[name = tensor("op_566_cast_fp16")]; tensor var_567_cast_fp16 = softmax(axis = var_138, x = aw_chunk_83_cast_fp16)[name = tensor("op_567_cast_fp16")]; tensor var_568_cast_fp16 = softmax(axis = var_138, x = aw_chunk_85_cast_fp16)[name = tensor("op_568_cast_fp16")]; tensor var_569_cast_fp16 = softmax(axis = var_138, x = aw_chunk_87_cast_fp16)[name = tensor("op_569_cast_fp16")]; tensor var_570_cast_fp16 = softmax(axis = var_138, x = aw_chunk_89_cast_fp16)[name = tensor("op_570_cast_fp16")]; tensor var_571_cast_fp16 = softmax(axis = var_138, x = aw_chunk_91_cast_fp16)[name = tensor("op_571_cast_fp16")]; tensor var_572_cast_fp16 = softmax(axis = var_138, x = aw_chunk_93_cast_fp16)[name = tensor("op_572_cast_fp16")]; tensor var_573_cast_fp16 = softmax(axis = var_138, x = aw_chunk_95_cast_fp16)[name = tensor("op_573_cast_fp16")]; tensor var_575_equation_0 = const()[name = tensor("op_575_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_575_cast_fp16 = einsum(equation = var_575_equation_0, values = (var_303_cast_fp16, var_526_cast_fp16))[name = tensor("op_575_cast_fp16")]; tensor var_577_equation_0 = const()[name = tensor("op_577_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_577_cast_fp16 = einsum(equation = var_577_equation_0, values = (var_303_cast_fp16, var_527_cast_fp16))[name = tensor("op_577_cast_fp16")]; tensor var_579_equation_0 = const()[name = tensor("op_579_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_579_cast_fp16 = einsum(equation = var_579_equation_0, values = (var_303_cast_fp16, var_528_cast_fp16))[name = tensor("op_579_cast_fp16")]; tensor var_581_equation_0 = const()[name = tensor("op_581_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_581_cast_fp16 = einsum(equation = var_581_equation_0, values = (var_303_cast_fp16, var_529_cast_fp16))[name = tensor("op_581_cast_fp16")]; tensor var_583_equation_0 = const()[name = tensor("op_583_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_583_cast_fp16 = einsum(equation = var_583_equation_0, values = (var_303_cast_fp16, var_530_cast_fp16))[name = tensor("op_583_cast_fp16")]; tensor var_585_equation_0 = const()[name = tensor("op_585_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_585_cast_fp16 = einsum(equation = var_585_equation_0, values = (var_303_cast_fp16, var_531_cast_fp16))[name = tensor("op_585_cast_fp16")]; tensor var_587_equation_0 = const()[name = tensor("op_587_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_587_cast_fp16 = einsum(equation = var_587_equation_0, values = (var_307_cast_fp16, var_532_cast_fp16))[name = tensor("op_587_cast_fp16")]; tensor var_589_equation_0 = const()[name = tensor("op_589_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_589_cast_fp16 = einsum(equation = var_589_equation_0, values = (var_307_cast_fp16, var_533_cast_fp16))[name = tensor("op_589_cast_fp16")]; tensor var_591_equation_0 = const()[name = tensor("op_591_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_591_cast_fp16 = einsum(equation = var_591_equation_0, values = (var_307_cast_fp16, var_534_cast_fp16))[name = tensor("op_591_cast_fp16")]; tensor var_593_equation_0 = const()[name = tensor("op_593_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_593_cast_fp16 = einsum(equation = var_593_equation_0, values = (var_307_cast_fp16, var_535_cast_fp16))[name = tensor("op_593_cast_fp16")]; tensor var_595_equation_0 = const()[name = tensor("op_595_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_595_cast_fp16 = einsum(equation = var_595_equation_0, values = (var_307_cast_fp16, var_536_cast_fp16))[name = tensor("op_595_cast_fp16")]; tensor var_597_equation_0 = const()[name = tensor("op_597_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_597_cast_fp16 = einsum(equation = var_597_equation_0, values = (var_307_cast_fp16, var_537_cast_fp16))[name = tensor("op_597_cast_fp16")]; tensor var_599_equation_0 = const()[name = tensor("op_599_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_599_cast_fp16 = einsum(equation = var_599_equation_0, values = (var_311_cast_fp16, var_538_cast_fp16))[name = tensor("op_599_cast_fp16")]; tensor var_601_equation_0 = const()[name = tensor("op_601_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_601_cast_fp16 = einsum(equation = var_601_equation_0, values = (var_311_cast_fp16, var_539_cast_fp16))[name = tensor("op_601_cast_fp16")]; tensor var_603_equation_0 = const()[name = tensor("op_603_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_603_cast_fp16 = einsum(equation = var_603_equation_0, values = (var_311_cast_fp16, var_540_cast_fp16))[name = tensor("op_603_cast_fp16")]; tensor var_605_equation_0 = const()[name = tensor("op_605_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_605_cast_fp16 = einsum(equation = var_605_equation_0, values = (var_311_cast_fp16, var_541_cast_fp16))[name = tensor("op_605_cast_fp16")]; tensor var_607_equation_0 = const()[name = tensor("op_607_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_607_cast_fp16 = einsum(equation = var_607_equation_0, values = (var_311_cast_fp16, var_542_cast_fp16))[name = tensor("op_607_cast_fp16")]; tensor var_609_equation_0 = const()[name = tensor("op_609_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_609_cast_fp16 = einsum(equation = var_609_equation_0, values = (var_311_cast_fp16, var_543_cast_fp16))[name = tensor("op_609_cast_fp16")]; tensor var_611_equation_0 = const()[name = tensor("op_611_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_611_cast_fp16 = einsum(equation = var_611_equation_0, values = (var_315_cast_fp16, var_544_cast_fp16))[name = tensor("op_611_cast_fp16")]; tensor var_613_equation_0 = const()[name = tensor("op_613_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_613_cast_fp16 = einsum(equation = var_613_equation_0, values = (var_315_cast_fp16, var_545_cast_fp16))[name = tensor("op_613_cast_fp16")]; tensor var_615_equation_0 = const()[name = tensor("op_615_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_615_cast_fp16 = einsum(equation = var_615_equation_0, values = (var_315_cast_fp16, var_546_cast_fp16))[name = tensor("op_615_cast_fp16")]; tensor var_617_equation_0 = const()[name = tensor("op_617_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_617_cast_fp16 = einsum(equation = var_617_equation_0, values = (var_315_cast_fp16, var_547_cast_fp16))[name = tensor("op_617_cast_fp16")]; tensor var_619_equation_0 = const()[name = tensor("op_619_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_619_cast_fp16 = einsum(equation = var_619_equation_0, values = (var_315_cast_fp16, var_548_cast_fp16))[name = tensor("op_619_cast_fp16")]; tensor var_621_equation_0 = const()[name = tensor("op_621_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_621_cast_fp16 = einsum(equation = var_621_equation_0, values = (var_315_cast_fp16, var_549_cast_fp16))[name = tensor("op_621_cast_fp16")]; tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_319_cast_fp16, var_550_cast_fp16))[name = tensor("op_623_cast_fp16")]; tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_319_cast_fp16, var_551_cast_fp16))[name = tensor("op_625_cast_fp16")]; tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_319_cast_fp16, var_552_cast_fp16))[name = tensor("op_627_cast_fp16")]; tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_319_cast_fp16, var_553_cast_fp16))[name = tensor("op_629_cast_fp16")]; tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_319_cast_fp16, var_554_cast_fp16))[name = tensor("op_631_cast_fp16")]; tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_319_cast_fp16, var_555_cast_fp16))[name = tensor("op_633_cast_fp16")]; tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_323_cast_fp16, var_556_cast_fp16))[name = tensor("op_635_cast_fp16")]; tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_323_cast_fp16, var_557_cast_fp16))[name = tensor("op_637_cast_fp16")]; tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_323_cast_fp16, var_558_cast_fp16))[name = tensor("op_639_cast_fp16")]; tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_323_cast_fp16, var_559_cast_fp16))[name = tensor("op_641_cast_fp16")]; tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_323_cast_fp16, var_560_cast_fp16))[name = tensor("op_643_cast_fp16")]; tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_323_cast_fp16, var_561_cast_fp16))[name = tensor("op_645_cast_fp16")]; tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_327_cast_fp16, var_562_cast_fp16))[name = tensor("op_647_cast_fp16")]; tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_327_cast_fp16, var_563_cast_fp16))[name = tensor("op_649_cast_fp16")]; tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_327_cast_fp16, var_564_cast_fp16))[name = tensor("op_651_cast_fp16")]; tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_327_cast_fp16, var_565_cast_fp16))[name = tensor("op_653_cast_fp16")]; tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_327_cast_fp16, var_566_cast_fp16))[name = tensor("op_655_cast_fp16")]; tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_327_cast_fp16, var_567_cast_fp16))[name = tensor("op_657_cast_fp16")]; tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_331_cast_fp16, var_568_cast_fp16))[name = tensor("op_659_cast_fp16")]; tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_331_cast_fp16, var_569_cast_fp16))[name = tensor("op_661_cast_fp16")]; tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_331_cast_fp16, var_570_cast_fp16))[name = tensor("op_663_cast_fp16")]; tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_331_cast_fp16, var_571_cast_fp16))[name = tensor("op_665_cast_fp16")]; tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_331_cast_fp16, var_572_cast_fp16))[name = tensor("op_667_cast_fp16")]; tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_331_cast_fp16, var_573_cast_fp16))[name = tensor("op_669_cast_fp16")]; tensor var_671_interleave_0 = const()[name = tensor("op_671_interleave_0"), val = tensor(false)]; tensor var_671_cast_fp16 = concat(axis = var_125, interleave = var_671_interleave_0, values = (var_575_cast_fp16, var_577_cast_fp16, var_579_cast_fp16, var_581_cast_fp16, var_583_cast_fp16, var_585_cast_fp16))[name = tensor("op_671_cast_fp16")]; tensor var_673_interleave_0 = const()[name = tensor("op_673_interleave_0"), val = tensor(false)]; tensor var_673_cast_fp16 = concat(axis = var_125, interleave = var_673_interleave_0, values = (var_587_cast_fp16, var_589_cast_fp16, var_591_cast_fp16, var_593_cast_fp16, var_595_cast_fp16, var_597_cast_fp16))[name = tensor("op_673_cast_fp16")]; tensor var_675_interleave_0 = const()[name = tensor("op_675_interleave_0"), val = tensor(false)]; tensor var_675_cast_fp16 = concat(axis = var_125, interleave = var_675_interleave_0, values = (var_599_cast_fp16, var_601_cast_fp16, var_603_cast_fp16, var_605_cast_fp16, var_607_cast_fp16, var_609_cast_fp16))[name = tensor("op_675_cast_fp16")]; tensor var_677_interleave_0 = const()[name = tensor("op_677_interleave_0"), val = tensor(false)]; tensor var_677_cast_fp16 = concat(axis = var_125, interleave = var_677_interleave_0, values = (var_611_cast_fp16, var_613_cast_fp16, var_615_cast_fp16, var_617_cast_fp16, var_619_cast_fp16, var_621_cast_fp16))[name = tensor("op_677_cast_fp16")]; tensor var_679_interleave_0 = const()[name = tensor("op_679_interleave_0"), val = tensor(false)]; tensor var_679_cast_fp16 = concat(axis = var_125, interleave = var_679_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16))[name = tensor("op_679_cast_fp16")]; tensor var_681_interleave_0 = const()[name = tensor("op_681_interleave_0"), val = tensor(false)]; tensor var_681_cast_fp16 = concat(axis = var_125, interleave = var_681_interleave_0, values = (var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16))[name = tensor("op_681_cast_fp16")]; tensor var_683_interleave_0 = const()[name = tensor("op_683_interleave_0"), val = tensor(false)]; tensor var_683_cast_fp16 = concat(axis = var_125, interleave = var_683_interleave_0, values = (var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16))[name = tensor("op_683_cast_fp16")]; tensor var_685_interleave_0 = const()[name = tensor("op_685_interleave_0"), val = tensor(false)]; tensor var_685_cast_fp16 = concat(axis = var_125, interleave = var_685_interleave_0, values = (var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("op_685_cast_fp16")]; tensor input_1_interleave_0 = const()[name = tensor("input_1_interleave_0"), val = tensor(false)]; tensor input_1_cast_fp16 = concat(axis = var_138, interleave = input_1_interleave_0, values = (var_671_cast_fp16, var_673_cast_fp16, var_675_cast_fp16, var_677_cast_fp16, var_679_cast_fp16, var_681_cast_fp16, var_683_cast_fp16, var_685_cast_fp16))[name = tensor("input_1_cast_fp16")]; tensor obj_3_pad_type_0 = const()[name = tensor("obj_3_pad_type_0"), val = tensor("valid")]; tensor obj_3_strides_0 = const()[name = tensor("obj_3_strides_0"), val = tensor([1, 1])]; tensor obj_3_pad_0 = const()[name = tensor("obj_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_3_dilations_0 = const()[name = tensor("obj_3_dilations_0"), val = tensor([1, 1])]; tensor obj_3_groups_0 = const()[name = tensor("obj_3_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4936640)))]; tensor layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5460992)))]; tensor obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_3_dilations_0, groups = obj_3_groups_0, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = obj_3_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("obj_3_cast_fp16")]; tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; tensor var_704_to_fp16 = const()[name = tensor("op_704_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_704_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5462080)))]; tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5463168)))]; tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor input_5_pad_type_0 = const()[name = tensor("input_5_pad_type_0"), val = tensor("valid")]; tensor input_5_strides_0 = const()[name = tensor("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = tensor("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = tensor("input_5_dilations_0"), val = tensor([1, 1])]; tensor input_5_groups_0 = const()[name = tensor("input_5_groups_0"), val = tensor(1)]; tensor layers_0_fc1_weight_to_fp16 = const()[name = tensor("layers_0_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5464256)))]; tensor layers_0_fc1_bias_to_fp16 = const()[name = tensor("layers_0_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7561472)))]; tensor input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor hidden_states_5_pad_type_0 = const()[name = tensor("hidden_states_5_pad_type_0"), val = tensor("valid")]; tensor hidden_states_5_strides_0 = const()[name = tensor("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = tensor("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = tensor("hidden_states_5_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_5_groups_0 = const()[name = tensor("hidden_states_5_groups_0"), val = tensor(1)]; tensor layers_0_fc2_weight_to_fp16 = const()[name = tensor("layers_0_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7565632)))]; tensor layers_0_fc2_bias_to_fp16 = const()[name = tensor("layers_0_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9662848)))]; tensor hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; tensor var_736 = const()[name = tensor("op_736"), val = tensor(3)]; tensor var_749 = const()[name = tensor("op_749"), val = tensor(1)]; tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; tensor var_766_to_fp16 = const()[name = tensor("op_766_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_766_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9663936)))]; tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9665024)))]; tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; tensor query_3_pad_type_0 = const()[name = tensor("query_3_pad_type_0"), val = tensor("valid")]; tensor query_3_strides_0 = const()[name = tensor("query_3_strides_0"), val = tensor([1, 1])]; tensor query_3_pad_0 = const()[name = tensor("query_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_3_dilations_0 = const()[name = tensor("query_3_dilations_0"), val = tensor([1, 1])]; tensor query_3_groups_0 = const()[name = tensor("query_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9666112)))]; tensor layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10190464)))]; tensor query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("query_3_cast_fp16")]; tensor key_3_pad_type_0 = const()[name = tensor("key_3_pad_type_0"), val = tensor("valid")]; tensor key_3_strides_0 = const()[name = tensor("key_3_strides_0"), val = tensor([1, 1])]; tensor key_3_pad_0 = const()[name = tensor("key_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_3_dilations_0 = const()[name = tensor("key_3_dilations_0"), val = tensor([1, 1])]; tensor key_3_groups_0 = const()[name = tensor("key_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10191552)))]; tensor key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("key_3_cast_fp16")]; tensor value_3_pad_type_0 = const()[name = tensor("value_3_pad_type_0"), val = tensor("valid")]; tensor value_3_strides_0 = const()[name = tensor("value_3_strides_0"), val = tensor([1, 1])]; tensor value_3_pad_0 = const()[name = tensor("value_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_3_dilations_0 = const()[name = tensor("value_3_dilations_0"), val = tensor([1, 1])]; tensor value_3_groups_0 = const()[name = tensor("value_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10715904)))]; tensor layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11240256)))]; tensor value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("value_3_cast_fp16")]; tensor var_801_begin_0 = const()[name = tensor("op_801_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_801_end_0 = const()[name = tensor("op_801_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_801_end_mask_0 = const()[name = tensor("op_801_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_801_cast_fp16 = slice_by_index(begin = var_801_begin_0, end = var_801_end_0, end_mask = var_801_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_801_cast_fp16")]; tensor var_805_begin_0 = const()[name = tensor("op_805_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_805_end_0 = const()[name = tensor("op_805_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_805_end_mask_0 = const()[name = tensor("op_805_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_805_cast_fp16 = slice_by_index(begin = var_805_begin_0, end = var_805_end_0, end_mask = var_805_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_805_cast_fp16")]; tensor var_809_begin_0 = const()[name = tensor("op_809_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_809_end_0 = const()[name = tensor("op_809_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_809_end_mask_0 = const()[name = tensor("op_809_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_809_cast_fp16 = slice_by_index(begin = var_809_begin_0, end = var_809_end_0, end_mask = var_809_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_809_cast_fp16")]; tensor var_813_begin_0 = const()[name = tensor("op_813_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_813_end_0 = const()[name = tensor("op_813_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_813_end_mask_0 = const()[name = tensor("op_813_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_813_cast_fp16 = slice_by_index(begin = var_813_begin_0, end = var_813_end_0, end_mask = var_813_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_813_cast_fp16")]; tensor var_817_begin_0 = const()[name = tensor("op_817_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_817_end_0 = const()[name = tensor("op_817_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_817_end_mask_0 = const()[name = tensor("op_817_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_817_cast_fp16 = slice_by_index(begin = var_817_begin_0, end = var_817_end_0, end_mask = var_817_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_817_cast_fp16")]; tensor var_821_begin_0 = const()[name = tensor("op_821_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_821_end_0 = const()[name = tensor("op_821_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_821_end_mask_0 = const()[name = tensor("op_821_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_821_cast_fp16 = slice_by_index(begin = var_821_begin_0, end = var_821_end_0, end_mask = var_821_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_821_cast_fp16")]; tensor var_825_begin_0 = const()[name = tensor("op_825_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_825_end_0 = const()[name = tensor("op_825_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_825_end_mask_0 = const()[name = tensor("op_825_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_825_cast_fp16 = slice_by_index(begin = var_825_begin_0, end = var_825_end_0, end_mask = var_825_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_825_cast_fp16")]; tensor var_829_begin_0 = const()[name = tensor("op_829_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_829_end_0 = const()[name = tensor("op_829_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_829_end_mask_0 = const()[name = tensor("op_829_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_829_cast_fp16 = slice_by_index(begin = var_829_begin_0, end = var_829_end_0, end_mask = var_829_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_829_cast_fp16")]; tensor var_832_begin_0 = const()[name = tensor("op_832_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_832_end_0 = const()[name = tensor("op_832_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_832_end_mask_0 = const()[name = tensor("op_832_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_832_cast_fp16 = slice_by_index(begin = var_832_begin_0, end = var_832_end_0, end_mask = var_832_end_mask_0, x = var_801_cast_fp16)[name = tensor("op_832_cast_fp16")]; tensor var_833_begin_0 = const()[name = tensor("op_833_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_833_end_0 = const()[name = tensor("op_833_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_833_end_mask_0 = const()[name = tensor("op_833_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_833_cast_fp16 = slice_by_index(begin = var_833_begin_0, end = var_833_end_0, end_mask = var_833_end_mask_0, x = var_801_cast_fp16)[name = tensor("op_833_cast_fp16")]; tensor var_834_begin_0 = const()[name = tensor("op_834_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_834_end_0 = const()[name = tensor("op_834_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_834_end_mask_0 = const()[name = tensor("op_834_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_834_cast_fp16 = slice_by_index(begin = var_834_begin_0, end = var_834_end_0, end_mask = var_834_end_mask_0, x = var_801_cast_fp16)[name = tensor("op_834_cast_fp16")]; tensor var_835_begin_0 = const()[name = tensor("op_835_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_835_end_0 = const()[name = tensor("op_835_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_835_end_mask_0 = const()[name = tensor("op_835_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_835_cast_fp16 = slice_by_index(begin = var_835_begin_0, end = var_835_end_0, end_mask = var_835_end_mask_0, x = var_801_cast_fp16)[name = tensor("op_835_cast_fp16")]; tensor var_836_begin_0 = const()[name = tensor("op_836_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_836_end_0 = const()[name = tensor("op_836_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_836_end_mask_0 = const()[name = tensor("op_836_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_836_cast_fp16 = slice_by_index(begin = var_836_begin_0, end = var_836_end_0, end_mask = var_836_end_mask_0, x = var_801_cast_fp16)[name = tensor("op_836_cast_fp16")]; tensor var_837_begin_0 = const()[name = tensor("op_837_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_837_end_0 = const()[name = tensor("op_837_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_837_end_mask_0 = const()[name = tensor("op_837_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_837_cast_fp16 = slice_by_index(begin = var_837_begin_0, end = var_837_end_0, end_mask = var_837_end_mask_0, x = var_801_cast_fp16)[name = tensor("op_837_cast_fp16")]; tensor var_838_begin_0 = const()[name = tensor("op_838_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_838_end_0 = const()[name = tensor("op_838_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_838_end_mask_0 = const()[name = tensor("op_838_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_838_cast_fp16 = slice_by_index(begin = var_838_begin_0, end = var_838_end_0, end_mask = var_838_end_mask_0, x = var_805_cast_fp16)[name = tensor("op_838_cast_fp16")]; tensor var_839_begin_0 = const()[name = tensor("op_839_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_839_end_0 = const()[name = tensor("op_839_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_839_end_mask_0 = const()[name = tensor("op_839_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_839_cast_fp16 = slice_by_index(begin = var_839_begin_0, end = var_839_end_0, end_mask = var_839_end_mask_0, x = var_805_cast_fp16)[name = tensor("op_839_cast_fp16")]; tensor var_840_begin_0 = const()[name = tensor("op_840_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_840_end_0 = const()[name = tensor("op_840_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_840_end_mask_0 = const()[name = tensor("op_840_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_840_cast_fp16 = slice_by_index(begin = var_840_begin_0, end = var_840_end_0, end_mask = var_840_end_mask_0, x = var_805_cast_fp16)[name = tensor("op_840_cast_fp16")]; tensor var_841_begin_0 = const()[name = tensor("op_841_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_841_end_0 = const()[name = tensor("op_841_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_841_end_mask_0 = const()[name = tensor("op_841_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_841_cast_fp16 = slice_by_index(begin = var_841_begin_0, end = var_841_end_0, end_mask = var_841_end_mask_0, x = var_805_cast_fp16)[name = tensor("op_841_cast_fp16")]; tensor var_842_begin_0 = const()[name = tensor("op_842_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_842_end_0 = const()[name = tensor("op_842_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_842_end_mask_0 = const()[name = tensor("op_842_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_842_cast_fp16 = slice_by_index(begin = var_842_begin_0, end = var_842_end_0, end_mask = var_842_end_mask_0, x = var_805_cast_fp16)[name = tensor("op_842_cast_fp16")]; tensor var_843_begin_0 = const()[name = tensor("op_843_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_843_end_0 = const()[name = tensor("op_843_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_843_end_mask_0 = const()[name = tensor("op_843_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_843_cast_fp16 = slice_by_index(begin = var_843_begin_0, end = var_843_end_0, end_mask = var_843_end_mask_0, x = var_805_cast_fp16)[name = tensor("op_843_cast_fp16")]; tensor var_844_begin_0 = const()[name = tensor("op_844_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_844_end_0 = const()[name = tensor("op_844_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_844_end_mask_0 = const()[name = tensor("op_844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_844_cast_fp16 = slice_by_index(begin = var_844_begin_0, end = var_844_end_0, end_mask = var_844_end_mask_0, x = var_809_cast_fp16)[name = tensor("op_844_cast_fp16")]; tensor var_845_begin_0 = const()[name = tensor("op_845_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_845_end_0 = const()[name = tensor("op_845_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_845_end_mask_0 = const()[name = tensor("op_845_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_845_cast_fp16 = slice_by_index(begin = var_845_begin_0, end = var_845_end_0, end_mask = var_845_end_mask_0, x = var_809_cast_fp16)[name = tensor("op_845_cast_fp16")]; tensor var_846_begin_0 = const()[name = tensor("op_846_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_846_end_0 = const()[name = tensor("op_846_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_846_end_mask_0 = const()[name = tensor("op_846_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_846_cast_fp16 = slice_by_index(begin = var_846_begin_0, end = var_846_end_0, end_mask = var_846_end_mask_0, x = var_809_cast_fp16)[name = tensor("op_846_cast_fp16")]; tensor var_847_begin_0 = const()[name = tensor("op_847_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_847_end_0 = const()[name = tensor("op_847_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_847_end_mask_0 = const()[name = tensor("op_847_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_847_cast_fp16 = slice_by_index(begin = var_847_begin_0, end = var_847_end_0, end_mask = var_847_end_mask_0, x = var_809_cast_fp16)[name = tensor("op_847_cast_fp16")]; tensor var_848_begin_0 = const()[name = tensor("op_848_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_848_end_0 = const()[name = tensor("op_848_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_848_end_mask_0 = const()[name = tensor("op_848_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_848_cast_fp16 = slice_by_index(begin = var_848_begin_0, end = var_848_end_0, end_mask = var_848_end_mask_0, x = var_809_cast_fp16)[name = tensor("op_848_cast_fp16")]; tensor var_849_begin_0 = const()[name = tensor("op_849_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_849_end_0 = const()[name = tensor("op_849_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_849_end_mask_0 = const()[name = tensor("op_849_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_849_cast_fp16 = slice_by_index(begin = var_849_begin_0, end = var_849_end_0, end_mask = var_849_end_mask_0, x = var_809_cast_fp16)[name = tensor("op_849_cast_fp16")]; tensor var_850_begin_0 = const()[name = tensor("op_850_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_850_end_0 = const()[name = tensor("op_850_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_850_end_mask_0 = const()[name = tensor("op_850_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_850_cast_fp16 = slice_by_index(begin = var_850_begin_0, end = var_850_end_0, end_mask = var_850_end_mask_0, x = var_813_cast_fp16)[name = tensor("op_850_cast_fp16")]; tensor var_851_begin_0 = const()[name = tensor("op_851_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_851_end_0 = const()[name = tensor("op_851_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_851_end_mask_0 = const()[name = tensor("op_851_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_851_cast_fp16 = slice_by_index(begin = var_851_begin_0, end = var_851_end_0, end_mask = var_851_end_mask_0, x = var_813_cast_fp16)[name = tensor("op_851_cast_fp16")]; tensor var_852_begin_0 = const()[name = tensor("op_852_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_852_end_0 = const()[name = tensor("op_852_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_852_end_mask_0 = const()[name = tensor("op_852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_852_cast_fp16 = slice_by_index(begin = var_852_begin_0, end = var_852_end_0, end_mask = var_852_end_mask_0, x = var_813_cast_fp16)[name = tensor("op_852_cast_fp16")]; tensor var_853_begin_0 = const()[name = tensor("op_853_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_853_end_0 = const()[name = tensor("op_853_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_853_end_mask_0 = const()[name = tensor("op_853_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_853_cast_fp16 = slice_by_index(begin = var_853_begin_0, end = var_853_end_0, end_mask = var_853_end_mask_0, x = var_813_cast_fp16)[name = tensor("op_853_cast_fp16")]; tensor var_854_begin_0 = const()[name = tensor("op_854_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_854_end_0 = const()[name = tensor("op_854_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_854_end_mask_0 = const()[name = tensor("op_854_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_854_cast_fp16 = slice_by_index(begin = var_854_begin_0, end = var_854_end_0, end_mask = var_854_end_mask_0, x = var_813_cast_fp16)[name = tensor("op_854_cast_fp16")]; tensor var_855_begin_0 = const()[name = tensor("op_855_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_855_end_0 = const()[name = tensor("op_855_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_855_end_mask_0 = const()[name = tensor("op_855_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_855_cast_fp16 = slice_by_index(begin = var_855_begin_0, end = var_855_end_0, end_mask = var_855_end_mask_0, x = var_813_cast_fp16)[name = tensor("op_855_cast_fp16")]; tensor var_856_begin_0 = const()[name = tensor("op_856_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_856_end_0 = const()[name = tensor("op_856_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_856_end_mask_0 = const()[name = tensor("op_856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_856_cast_fp16 = slice_by_index(begin = var_856_begin_0, end = var_856_end_0, end_mask = var_856_end_mask_0, x = var_817_cast_fp16)[name = tensor("op_856_cast_fp16")]; tensor var_857_begin_0 = const()[name = tensor("op_857_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_857_end_0 = const()[name = tensor("op_857_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_857_end_mask_0 = const()[name = tensor("op_857_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_857_cast_fp16 = slice_by_index(begin = var_857_begin_0, end = var_857_end_0, end_mask = var_857_end_mask_0, x = var_817_cast_fp16)[name = tensor("op_857_cast_fp16")]; tensor var_858_begin_0 = const()[name = tensor("op_858_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_858_end_0 = const()[name = tensor("op_858_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_858_end_mask_0 = const()[name = tensor("op_858_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_858_cast_fp16 = slice_by_index(begin = var_858_begin_0, end = var_858_end_0, end_mask = var_858_end_mask_0, x = var_817_cast_fp16)[name = tensor("op_858_cast_fp16")]; tensor var_859_begin_0 = const()[name = tensor("op_859_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_859_end_0 = const()[name = tensor("op_859_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_859_end_mask_0 = const()[name = tensor("op_859_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_859_cast_fp16 = slice_by_index(begin = var_859_begin_0, end = var_859_end_0, end_mask = var_859_end_mask_0, x = var_817_cast_fp16)[name = tensor("op_859_cast_fp16")]; tensor var_860_begin_0 = const()[name = tensor("op_860_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_860_end_0 = const()[name = tensor("op_860_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_860_end_mask_0 = const()[name = tensor("op_860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_860_cast_fp16 = slice_by_index(begin = var_860_begin_0, end = var_860_end_0, end_mask = var_860_end_mask_0, x = var_817_cast_fp16)[name = tensor("op_860_cast_fp16")]; tensor var_861_begin_0 = const()[name = tensor("op_861_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_861_end_0 = const()[name = tensor("op_861_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_861_end_mask_0 = const()[name = tensor("op_861_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_861_cast_fp16 = slice_by_index(begin = var_861_begin_0, end = var_861_end_0, end_mask = var_861_end_mask_0, x = var_817_cast_fp16)[name = tensor("op_861_cast_fp16")]; tensor var_862_begin_0 = const()[name = tensor("op_862_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_862_end_0 = const()[name = tensor("op_862_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_862_end_mask_0 = const()[name = tensor("op_862_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_862_cast_fp16 = slice_by_index(begin = var_862_begin_0, end = var_862_end_0, end_mask = var_862_end_mask_0, x = var_821_cast_fp16)[name = tensor("op_862_cast_fp16")]; tensor var_863_begin_0 = const()[name = tensor("op_863_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_863_end_0 = const()[name = tensor("op_863_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_863_end_mask_0 = const()[name = tensor("op_863_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_863_cast_fp16 = slice_by_index(begin = var_863_begin_0, end = var_863_end_0, end_mask = var_863_end_mask_0, x = var_821_cast_fp16)[name = tensor("op_863_cast_fp16")]; tensor var_864_begin_0 = const()[name = tensor("op_864_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_864_end_0 = const()[name = tensor("op_864_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_864_end_mask_0 = const()[name = tensor("op_864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_864_cast_fp16 = slice_by_index(begin = var_864_begin_0, end = var_864_end_0, end_mask = var_864_end_mask_0, x = var_821_cast_fp16)[name = tensor("op_864_cast_fp16")]; tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = var_821_cast_fp16)[name = tensor("op_865_cast_fp16")]; tensor var_866_begin_0 = const()[name = tensor("op_866_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_866_end_0 = const()[name = tensor("op_866_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_866_end_mask_0 = const()[name = tensor("op_866_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_866_cast_fp16 = slice_by_index(begin = var_866_begin_0, end = var_866_end_0, end_mask = var_866_end_mask_0, x = var_821_cast_fp16)[name = tensor("op_866_cast_fp16")]; tensor var_867_begin_0 = const()[name = tensor("op_867_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_867_end_0 = const()[name = tensor("op_867_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_867_end_mask_0 = const()[name = tensor("op_867_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_867_cast_fp16 = slice_by_index(begin = var_867_begin_0, end = var_867_end_0, end_mask = var_867_end_mask_0, x = var_821_cast_fp16)[name = tensor("op_867_cast_fp16")]; tensor var_868_begin_0 = const()[name = tensor("op_868_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_868_end_0 = const()[name = tensor("op_868_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_868_end_mask_0 = const()[name = tensor("op_868_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_868_cast_fp16 = slice_by_index(begin = var_868_begin_0, end = var_868_end_0, end_mask = var_868_end_mask_0, x = var_825_cast_fp16)[name = tensor("op_868_cast_fp16")]; tensor var_869_begin_0 = const()[name = tensor("op_869_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_869_end_0 = const()[name = tensor("op_869_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_869_end_mask_0 = const()[name = tensor("op_869_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = var_869_end_0, end_mask = var_869_end_mask_0, x = var_825_cast_fp16)[name = tensor("op_869_cast_fp16")]; tensor var_870_begin_0 = const()[name = tensor("op_870_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_870_end_0 = const()[name = tensor("op_870_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_870_end_mask_0 = const()[name = tensor("op_870_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_870_cast_fp16 = slice_by_index(begin = var_870_begin_0, end = var_870_end_0, end_mask = var_870_end_mask_0, x = var_825_cast_fp16)[name = tensor("op_870_cast_fp16")]; tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = var_825_cast_fp16)[name = tensor("op_871_cast_fp16")]; tensor var_872_begin_0 = const()[name = tensor("op_872_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_872_end_0 = const()[name = tensor("op_872_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_872_end_mask_0 = const()[name = tensor("op_872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = var_872_end_0, end_mask = var_872_end_mask_0, x = var_825_cast_fp16)[name = tensor("op_872_cast_fp16")]; tensor var_873_begin_0 = const()[name = tensor("op_873_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_873_end_0 = const()[name = tensor("op_873_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_873_end_mask_0 = const()[name = tensor("op_873_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_873_cast_fp16 = slice_by_index(begin = var_873_begin_0, end = var_873_end_0, end_mask = var_873_end_mask_0, x = var_825_cast_fp16)[name = tensor("op_873_cast_fp16")]; tensor var_874_begin_0 = const()[name = tensor("op_874_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_874_end_0 = const()[name = tensor("op_874_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_874_end_mask_0 = const()[name = tensor("op_874_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_874_cast_fp16 = slice_by_index(begin = var_874_begin_0, end = var_874_end_0, end_mask = var_874_end_mask_0, x = var_829_cast_fp16)[name = tensor("op_874_cast_fp16")]; tensor var_875_begin_0 = const()[name = tensor("op_875_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_875_end_0 = const()[name = tensor("op_875_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_875_end_mask_0 = const()[name = tensor("op_875_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_875_cast_fp16 = slice_by_index(begin = var_875_begin_0, end = var_875_end_0, end_mask = var_875_end_mask_0, x = var_829_cast_fp16)[name = tensor("op_875_cast_fp16")]; tensor var_876_begin_0 = const()[name = tensor("op_876_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_876_end_0 = const()[name = tensor("op_876_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_876_end_mask_0 = const()[name = tensor("op_876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_876_cast_fp16 = slice_by_index(begin = var_876_begin_0, end = var_876_end_0, end_mask = var_876_end_mask_0, x = var_829_cast_fp16)[name = tensor("op_876_cast_fp16")]; tensor var_877_begin_0 = const()[name = tensor("op_877_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_877_end_0 = const()[name = tensor("op_877_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_877_end_mask_0 = const()[name = tensor("op_877_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_877_cast_fp16 = slice_by_index(begin = var_877_begin_0, end = var_877_end_0, end_mask = var_877_end_mask_0, x = var_829_cast_fp16)[name = tensor("op_877_cast_fp16")]; tensor var_878_begin_0 = const()[name = tensor("op_878_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_878_end_0 = const()[name = tensor("op_878_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_878_end_mask_0 = const()[name = tensor("op_878_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_878_cast_fp16 = slice_by_index(begin = var_878_begin_0, end = var_878_end_0, end_mask = var_878_end_mask_0, x = var_829_cast_fp16)[name = tensor("op_878_cast_fp16")]; tensor var_879_begin_0 = const()[name = tensor("op_879_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_879_end_0 = const()[name = tensor("op_879_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_879_end_mask_0 = const()[name = tensor("op_879_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_879_cast_fp16 = slice_by_index(begin = var_879_begin_0, end = var_879_end_0, end_mask = var_879_end_mask_0, x = var_829_cast_fp16)[name = tensor("op_879_cast_fp16")]; tensor k_3_perm_0 = const()[name = tensor("k_3_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_884_begin_0 = const()[name = tensor("op_884_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_884_end_0 = const()[name = tensor("op_884_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_884_end_mask_0 = const()[name = tensor("op_884_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor("transpose_4")]; tensor var_884_cast_fp16 = slice_by_index(begin = var_884_begin_0, end = var_884_end_0, end_mask = var_884_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_884_cast_fp16")]; tensor var_888_begin_0 = const()[name = tensor("op_888_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_888_end_0 = const()[name = tensor("op_888_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_888_end_mask_0 = const()[name = tensor("op_888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_888_cast_fp16 = slice_by_index(begin = var_888_begin_0, end = var_888_end_0, end_mask = var_888_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_888_cast_fp16")]; tensor var_892_begin_0 = const()[name = tensor("op_892_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_892_end_0 = const()[name = tensor("op_892_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_892_end_mask_0 = const()[name = tensor("op_892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_892_cast_fp16 = slice_by_index(begin = var_892_begin_0, end = var_892_end_0, end_mask = var_892_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_892_cast_fp16")]; tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_896_cast_fp16")]; tensor var_900_begin_0 = const()[name = tensor("op_900_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_900_end_0 = const()[name = tensor("op_900_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_900_end_mask_0 = const()[name = tensor("op_900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_900_cast_fp16 = slice_by_index(begin = var_900_begin_0, end = var_900_end_0, end_mask = var_900_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_900_cast_fp16")]; tensor var_904_begin_0 = const()[name = tensor("op_904_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_904_end_0 = const()[name = tensor("op_904_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_904_end_mask_0 = const()[name = tensor("op_904_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_904_cast_fp16 = slice_by_index(begin = var_904_begin_0, end = var_904_end_0, end_mask = var_904_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_904_cast_fp16")]; tensor var_908_begin_0 = const()[name = tensor("op_908_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_908_end_0 = const()[name = tensor("op_908_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_908_end_mask_0 = const()[name = tensor("op_908_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_908_cast_fp16 = slice_by_index(begin = var_908_begin_0, end = var_908_end_0, end_mask = var_908_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_908_cast_fp16")]; tensor var_912_begin_0 = const()[name = tensor("op_912_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_912_end_0 = const()[name = tensor("op_912_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_912_end_mask_0 = const()[name = tensor("op_912_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_912_cast_fp16 = slice_by_index(begin = var_912_begin_0, end = var_912_end_0, end_mask = var_912_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_912_cast_fp16")]; tensor var_914_begin_0 = const()[name = tensor("op_914_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_914_end_0 = const()[name = tensor("op_914_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_914_end_mask_0 = const()[name = tensor("op_914_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_914_cast_fp16 = slice_by_index(begin = var_914_begin_0, end = var_914_end_0, end_mask = var_914_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_914_cast_fp16")]; tensor var_918_begin_0 = const()[name = tensor("op_918_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_918_end_0 = const()[name = tensor("op_918_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_918_end_mask_0 = const()[name = tensor("op_918_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_918_cast_fp16 = slice_by_index(begin = var_918_begin_0, end = var_918_end_0, end_mask = var_918_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_918_cast_fp16")]; tensor var_922_begin_0 = const()[name = tensor("op_922_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_922_end_0 = const()[name = tensor("op_922_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_922_end_mask_0 = const()[name = tensor("op_922_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_922_cast_fp16 = slice_by_index(begin = var_922_begin_0, end = var_922_end_0, end_mask = var_922_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_922_cast_fp16")]; tensor var_926_begin_0 = const()[name = tensor("op_926_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_926_end_0 = const()[name = tensor("op_926_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_926_end_mask_0 = const()[name = tensor("op_926_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_926_cast_fp16 = slice_by_index(begin = var_926_begin_0, end = var_926_end_0, end_mask = var_926_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_926_cast_fp16")]; tensor var_930_begin_0 = const()[name = tensor("op_930_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_930_end_0 = const()[name = tensor("op_930_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_930_end_mask_0 = const()[name = tensor("op_930_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_930_cast_fp16 = slice_by_index(begin = var_930_begin_0, end = var_930_end_0, end_mask = var_930_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_930_cast_fp16")]; tensor var_934_begin_0 = const()[name = tensor("op_934_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_934_end_0 = const()[name = tensor("op_934_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_934_end_mask_0 = const()[name = tensor("op_934_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_934_cast_fp16 = slice_by_index(begin = var_934_begin_0, end = var_934_end_0, end_mask = var_934_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_934_cast_fp16")]; tensor var_938_begin_0 = const()[name = tensor("op_938_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_938_end_0 = const()[name = tensor("op_938_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_938_end_mask_0 = const()[name = tensor("op_938_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_938_cast_fp16 = slice_by_index(begin = var_938_begin_0, end = var_938_end_0, end_mask = var_938_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_938_cast_fp16")]; tensor var_942_begin_0 = const()[name = tensor("op_942_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_942_end_0 = const()[name = tensor("op_942_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_942_end_mask_0 = const()[name = tensor("op_942_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_942_cast_fp16 = slice_by_index(begin = var_942_begin_0, end = var_942_end_0, end_mask = var_942_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_942_cast_fp16")]; tensor _SplitHeadsQ__mh_w_97_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_97_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_97_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_97_equation_0, values = (var_884_cast_fp16, var_832_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_97_cast_fp16")]; tensor _SplitHeadsQ__mh_w_99_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_99_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_99_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_99_equation_0, values = (var_884_cast_fp16, var_833_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_99_cast_fp16")]; tensor _SplitHeadsQ__mh_w_101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_101_equation_0, values = (var_884_cast_fp16, var_834_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_103_equation_0, values = (var_884_cast_fp16, var_835_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_105_equation_0, values = (var_884_cast_fp16, var_836_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_107_equation_0, values = (var_884_cast_fp16, var_837_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_109_equation_0, values = (var_888_cast_fp16, var_838_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_111_equation_0, values = (var_888_cast_fp16, var_839_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_113_equation_0, values = (var_888_cast_fp16, var_840_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_115_equation_0, values = (var_888_cast_fp16, var_841_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_117_equation_0, values = (var_888_cast_fp16, var_842_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_119_equation_0, values = (var_888_cast_fp16, var_843_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_121_equation_0, values = (var_892_cast_fp16, var_844_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_123_equation_0, values = (var_892_cast_fp16, var_845_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_125_equation_0, values = (var_892_cast_fp16, var_846_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_127_equation_0, values = (var_892_cast_fp16, var_847_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_129_equation_0, values = (var_892_cast_fp16, var_848_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_131_equation_0, values = (var_892_cast_fp16, var_849_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_133_equation_0, values = (var_896_cast_fp16, var_850_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_135_equation_0, values = (var_896_cast_fp16, var_851_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_137_equation_0, values = (var_896_cast_fp16, var_852_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_139_equation_0, values = (var_896_cast_fp16, var_853_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_141_equation_0, values = (var_896_cast_fp16, var_854_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_143_equation_0, values = (var_896_cast_fp16, var_855_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_145_equation_0, values = (var_900_cast_fp16, var_856_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_147_equation_0, values = (var_900_cast_fp16, var_857_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_149_equation_0, values = (var_900_cast_fp16, var_858_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_151_equation_0, values = (var_900_cast_fp16, var_859_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_153_equation_0, values = (var_900_cast_fp16, var_860_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_155_equation_0, values = (var_900_cast_fp16, var_861_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_157_equation_0, values = (var_904_cast_fp16, var_862_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_159_equation_0, values = (var_904_cast_fp16, var_863_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_161_equation_0, values = (var_904_cast_fp16, var_864_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_163_equation_0, values = (var_904_cast_fp16, var_865_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_165_equation_0, values = (var_904_cast_fp16, var_866_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_167_equation_0, values = (var_904_cast_fp16, var_867_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_169_equation_0, values = (var_908_cast_fp16, var_868_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_171_equation_0, values = (var_908_cast_fp16, var_869_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_173_equation_0, values = (var_908_cast_fp16, var_870_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_175_equation_0, values = (var_908_cast_fp16, var_871_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_177_equation_0, values = (var_908_cast_fp16, var_872_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_179_equation_0, values = (var_908_cast_fp16, var_873_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_181_equation_0, values = (var_912_cast_fp16, var_874_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_183_equation_0, values = (var_912_cast_fp16, var_875_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_185_equation_0, values = (var_912_cast_fp16, var_876_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_187_equation_0, values = (var_912_cast_fp16, var_877_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_189_equation_0, values = (var_912_cast_fp16, var_878_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_191_equation_0, values = (var_912_cast_fp16, var_879_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_191_cast_fp16")]; tensor var_1041_to_fp16 = const()[name = tensor("op_1041_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_97_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_97_cast_fp16, y = var_1041_to_fp16)[name = tensor("aw_chunk_97_cast_fp16")]; tensor var_1043_to_fp16 = const()[name = tensor("op_1043_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_99_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_99_cast_fp16, y = var_1043_to_fp16)[name = tensor("aw_chunk_99_cast_fp16")]; tensor var_1045_to_fp16 = const()[name = tensor("op_1045_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_101_cast_fp16, y = var_1045_to_fp16)[name = tensor("aw_chunk_101_cast_fp16")]; tensor var_1047_to_fp16 = const()[name = tensor("op_1047_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_103_cast_fp16, y = var_1047_to_fp16)[name = tensor("aw_chunk_103_cast_fp16")]; tensor var_1049_to_fp16 = const()[name = tensor("op_1049_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_105_cast_fp16, y = var_1049_to_fp16)[name = tensor("aw_chunk_105_cast_fp16")]; tensor var_1051_to_fp16 = const()[name = tensor("op_1051_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_107_cast_fp16, y = var_1051_to_fp16)[name = tensor("aw_chunk_107_cast_fp16")]; tensor var_1053_to_fp16 = const()[name = tensor("op_1053_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_109_cast_fp16, y = var_1053_to_fp16)[name = tensor("aw_chunk_109_cast_fp16")]; tensor var_1055_to_fp16 = const()[name = tensor("op_1055_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_111_cast_fp16, y = var_1055_to_fp16)[name = tensor("aw_chunk_111_cast_fp16")]; tensor var_1057_to_fp16 = const()[name = tensor("op_1057_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_113_cast_fp16, y = var_1057_to_fp16)[name = tensor("aw_chunk_113_cast_fp16")]; tensor var_1059_to_fp16 = const()[name = tensor("op_1059_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_115_cast_fp16, y = var_1059_to_fp16)[name = tensor("aw_chunk_115_cast_fp16")]; tensor var_1061_to_fp16 = const()[name = tensor("op_1061_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_117_cast_fp16, y = var_1061_to_fp16)[name = tensor("aw_chunk_117_cast_fp16")]; tensor var_1063_to_fp16 = const()[name = tensor("op_1063_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_119_cast_fp16, y = var_1063_to_fp16)[name = tensor("aw_chunk_119_cast_fp16")]; tensor var_1065_to_fp16 = const()[name = tensor("op_1065_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_121_cast_fp16, y = var_1065_to_fp16)[name = tensor("aw_chunk_121_cast_fp16")]; tensor var_1067_to_fp16 = const()[name = tensor("op_1067_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_123_cast_fp16, y = var_1067_to_fp16)[name = tensor("aw_chunk_123_cast_fp16")]; tensor var_1069_to_fp16 = const()[name = tensor("op_1069_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_125_cast_fp16, y = var_1069_to_fp16)[name = tensor("aw_chunk_125_cast_fp16")]; tensor var_1071_to_fp16 = const()[name = tensor("op_1071_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_127_cast_fp16, y = var_1071_to_fp16)[name = tensor("aw_chunk_127_cast_fp16")]; tensor var_1073_to_fp16 = const()[name = tensor("op_1073_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_129_cast_fp16, y = var_1073_to_fp16)[name = tensor("aw_chunk_129_cast_fp16")]; tensor var_1075_to_fp16 = const()[name = tensor("op_1075_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_131_cast_fp16, y = var_1075_to_fp16)[name = tensor("aw_chunk_131_cast_fp16")]; tensor var_1077_to_fp16 = const()[name = tensor("op_1077_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_133_cast_fp16, y = var_1077_to_fp16)[name = tensor("aw_chunk_133_cast_fp16")]; tensor var_1079_to_fp16 = const()[name = tensor("op_1079_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_135_cast_fp16, y = var_1079_to_fp16)[name = tensor("aw_chunk_135_cast_fp16")]; tensor var_1081_to_fp16 = const()[name = tensor("op_1081_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_137_cast_fp16, y = var_1081_to_fp16)[name = tensor("aw_chunk_137_cast_fp16")]; tensor var_1083_to_fp16 = const()[name = tensor("op_1083_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_139_cast_fp16, y = var_1083_to_fp16)[name = tensor("aw_chunk_139_cast_fp16")]; tensor var_1085_to_fp16 = const()[name = tensor("op_1085_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_141_cast_fp16, y = var_1085_to_fp16)[name = tensor("aw_chunk_141_cast_fp16")]; tensor var_1087_to_fp16 = const()[name = tensor("op_1087_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_143_cast_fp16, y = var_1087_to_fp16)[name = tensor("aw_chunk_143_cast_fp16")]; tensor var_1089_to_fp16 = const()[name = tensor("op_1089_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_145_cast_fp16, y = var_1089_to_fp16)[name = tensor("aw_chunk_145_cast_fp16")]; tensor var_1091_to_fp16 = const()[name = tensor("op_1091_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_147_cast_fp16, y = var_1091_to_fp16)[name = tensor("aw_chunk_147_cast_fp16")]; tensor var_1093_to_fp16 = const()[name = tensor("op_1093_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_149_cast_fp16, y = var_1093_to_fp16)[name = tensor("aw_chunk_149_cast_fp16")]; tensor var_1095_to_fp16 = const()[name = tensor("op_1095_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_151_cast_fp16, y = var_1095_to_fp16)[name = tensor("aw_chunk_151_cast_fp16")]; tensor var_1097_to_fp16 = const()[name = tensor("op_1097_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_153_cast_fp16, y = var_1097_to_fp16)[name = tensor("aw_chunk_153_cast_fp16")]; tensor var_1099_to_fp16 = const()[name = tensor("op_1099_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_155_cast_fp16, y = var_1099_to_fp16)[name = tensor("aw_chunk_155_cast_fp16")]; tensor var_1101_to_fp16 = const()[name = tensor("op_1101_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_157_cast_fp16, y = var_1101_to_fp16)[name = tensor("aw_chunk_157_cast_fp16")]; tensor var_1103_to_fp16 = const()[name = tensor("op_1103_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_159_cast_fp16, y = var_1103_to_fp16)[name = tensor("aw_chunk_159_cast_fp16")]; tensor var_1105_to_fp16 = const()[name = tensor("op_1105_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_161_cast_fp16, y = var_1105_to_fp16)[name = tensor("aw_chunk_161_cast_fp16")]; tensor var_1107_to_fp16 = const()[name = tensor("op_1107_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_163_cast_fp16, y = var_1107_to_fp16)[name = tensor("aw_chunk_163_cast_fp16")]; tensor var_1109_to_fp16 = const()[name = tensor("op_1109_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_165_cast_fp16, y = var_1109_to_fp16)[name = tensor("aw_chunk_165_cast_fp16")]; tensor var_1111_to_fp16 = const()[name = tensor("op_1111_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_167_cast_fp16, y = var_1111_to_fp16)[name = tensor("aw_chunk_167_cast_fp16")]; tensor var_1113_to_fp16 = const()[name = tensor("op_1113_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_169_cast_fp16, y = var_1113_to_fp16)[name = tensor("aw_chunk_169_cast_fp16")]; tensor var_1115_to_fp16 = const()[name = tensor("op_1115_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_171_cast_fp16, y = var_1115_to_fp16)[name = tensor("aw_chunk_171_cast_fp16")]; tensor var_1117_to_fp16 = const()[name = tensor("op_1117_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_173_cast_fp16, y = var_1117_to_fp16)[name = tensor("aw_chunk_173_cast_fp16")]; tensor var_1119_to_fp16 = const()[name = tensor("op_1119_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_175_cast_fp16, y = var_1119_to_fp16)[name = tensor("aw_chunk_175_cast_fp16")]; tensor var_1121_to_fp16 = const()[name = tensor("op_1121_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_177_cast_fp16, y = var_1121_to_fp16)[name = tensor("aw_chunk_177_cast_fp16")]; tensor var_1123_to_fp16 = const()[name = tensor("op_1123_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_179_cast_fp16, y = var_1123_to_fp16)[name = tensor("aw_chunk_179_cast_fp16")]; tensor var_1125_to_fp16 = const()[name = tensor("op_1125_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_181_cast_fp16, y = var_1125_to_fp16)[name = tensor("aw_chunk_181_cast_fp16")]; tensor var_1127_to_fp16 = const()[name = tensor("op_1127_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_183_cast_fp16, y = var_1127_to_fp16)[name = tensor("aw_chunk_183_cast_fp16")]; tensor var_1129_to_fp16 = const()[name = tensor("op_1129_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_185_cast_fp16, y = var_1129_to_fp16)[name = tensor("aw_chunk_185_cast_fp16")]; tensor var_1131_to_fp16 = const()[name = tensor("op_1131_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_187_cast_fp16, y = var_1131_to_fp16)[name = tensor("aw_chunk_187_cast_fp16")]; tensor var_1133_to_fp16 = const()[name = tensor("op_1133_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_189_cast_fp16, y = var_1133_to_fp16)[name = tensor("aw_chunk_189_cast_fp16")]; tensor var_1135_to_fp16 = const()[name = tensor("op_1135_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_191_cast_fp16, y = var_1135_to_fp16)[name = tensor("aw_chunk_191_cast_fp16")]; tensor var_1137_cast_fp16 = softmax(axis = var_749, x = aw_chunk_97_cast_fp16)[name = tensor("op_1137_cast_fp16")]; tensor var_1138_cast_fp16 = softmax(axis = var_749, x = aw_chunk_99_cast_fp16)[name = tensor("op_1138_cast_fp16")]; tensor var_1139_cast_fp16 = softmax(axis = var_749, x = aw_chunk_101_cast_fp16)[name = tensor("op_1139_cast_fp16")]; tensor var_1140_cast_fp16 = softmax(axis = var_749, x = aw_chunk_103_cast_fp16)[name = tensor("op_1140_cast_fp16")]; tensor var_1141_cast_fp16 = softmax(axis = var_749, x = aw_chunk_105_cast_fp16)[name = tensor("op_1141_cast_fp16")]; tensor var_1142_cast_fp16 = softmax(axis = var_749, x = aw_chunk_107_cast_fp16)[name = tensor("op_1142_cast_fp16")]; tensor var_1143_cast_fp16 = softmax(axis = var_749, x = aw_chunk_109_cast_fp16)[name = tensor("op_1143_cast_fp16")]; tensor var_1144_cast_fp16 = softmax(axis = var_749, x = aw_chunk_111_cast_fp16)[name = tensor("op_1144_cast_fp16")]; tensor var_1145_cast_fp16 = softmax(axis = var_749, x = aw_chunk_113_cast_fp16)[name = tensor("op_1145_cast_fp16")]; tensor var_1146_cast_fp16 = softmax(axis = var_749, x = aw_chunk_115_cast_fp16)[name = tensor("op_1146_cast_fp16")]; tensor var_1147_cast_fp16 = softmax(axis = var_749, x = aw_chunk_117_cast_fp16)[name = tensor("op_1147_cast_fp16")]; tensor var_1148_cast_fp16 = softmax(axis = var_749, x = aw_chunk_119_cast_fp16)[name = tensor("op_1148_cast_fp16")]; tensor var_1149_cast_fp16 = softmax(axis = var_749, x = aw_chunk_121_cast_fp16)[name = tensor("op_1149_cast_fp16")]; tensor var_1150_cast_fp16 = softmax(axis = var_749, x = aw_chunk_123_cast_fp16)[name = tensor("op_1150_cast_fp16")]; tensor var_1151_cast_fp16 = softmax(axis = var_749, x = aw_chunk_125_cast_fp16)[name = tensor("op_1151_cast_fp16")]; tensor var_1152_cast_fp16 = softmax(axis = var_749, x = aw_chunk_127_cast_fp16)[name = tensor("op_1152_cast_fp16")]; tensor var_1153_cast_fp16 = softmax(axis = var_749, x = aw_chunk_129_cast_fp16)[name = tensor("op_1153_cast_fp16")]; tensor var_1154_cast_fp16 = softmax(axis = var_749, x = aw_chunk_131_cast_fp16)[name = tensor("op_1154_cast_fp16")]; tensor var_1155_cast_fp16 = softmax(axis = var_749, x = aw_chunk_133_cast_fp16)[name = tensor("op_1155_cast_fp16")]; tensor var_1156_cast_fp16 = softmax(axis = var_749, x = aw_chunk_135_cast_fp16)[name = tensor("op_1156_cast_fp16")]; tensor var_1157_cast_fp16 = softmax(axis = var_749, x = aw_chunk_137_cast_fp16)[name = tensor("op_1157_cast_fp16")]; tensor var_1158_cast_fp16 = softmax(axis = var_749, x = aw_chunk_139_cast_fp16)[name = tensor("op_1158_cast_fp16")]; tensor var_1159_cast_fp16 = softmax(axis = var_749, x = aw_chunk_141_cast_fp16)[name = tensor("op_1159_cast_fp16")]; tensor var_1160_cast_fp16 = softmax(axis = var_749, x = aw_chunk_143_cast_fp16)[name = tensor("op_1160_cast_fp16")]; tensor var_1161_cast_fp16 = softmax(axis = var_749, x = aw_chunk_145_cast_fp16)[name = tensor("op_1161_cast_fp16")]; tensor var_1162_cast_fp16 = softmax(axis = var_749, x = aw_chunk_147_cast_fp16)[name = tensor("op_1162_cast_fp16")]; tensor var_1163_cast_fp16 = softmax(axis = var_749, x = aw_chunk_149_cast_fp16)[name = tensor("op_1163_cast_fp16")]; tensor var_1164_cast_fp16 = softmax(axis = var_749, x = aw_chunk_151_cast_fp16)[name = tensor("op_1164_cast_fp16")]; tensor var_1165_cast_fp16 = softmax(axis = var_749, x = aw_chunk_153_cast_fp16)[name = tensor("op_1165_cast_fp16")]; tensor var_1166_cast_fp16 = softmax(axis = var_749, x = aw_chunk_155_cast_fp16)[name = tensor("op_1166_cast_fp16")]; tensor var_1167_cast_fp16 = softmax(axis = var_749, x = aw_chunk_157_cast_fp16)[name = tensor("op_1167_cast_fp16")]; tensor var_1168_cast_fp16 = softmax(axis = var_749, x = aw_chunk_159_cast_fp16)[name = tensor("op_1168_cast_fp16")]; tensor var_1169_cast_fp16 = softmax(axis = var_749, x = aw_chunk_161_cast_fp16)[name = tensor("op_1169_cast_fp16")]; tensor var_1170_cast_fp16 = softmax(axis = var_749, x = aw_chunk_163_cast_fp16)[name = tensor("op_1170_cast_fp16")]; tensor var_1171_cast_fp16 = softmax(axis = var_749, x = aw_chunk_165_cast_fp16)[name = tensor("op_1171_cast_fp16")]; tensor var_1172_cast_fp16 = softmax(axis = var_749, x = aw_chunk_167_cast_fp16)[name = tensor("op_1172_cast_fp16")]; tensor var_1173_cast_fp16 = softmax(axis = var_749, x = aw_chunk_169_cast_fp16)[name = tensor("op_1173_cast_fp16")]; tensor var_1174_cast_fp16 = softmax(axis = var_749, x = aw_chunk_171_cast_fp16)[name = tensor("op_1174_cast_fp16")]; tensor var_1175_cast_fp16 = softmax(axis = var_749, x = aw_chunk_173_cast_fp16)[name = tensor("op_1175_cast_fp16")]; tensor var_1176_cast_fp16 = softmax(axis = var_749, x = aw_chunk_175_cast_fp16)[name = tensor("op_1176_cast_fp16")]; tensor var_1177_cast_fp16 = softmax(axis = var_749, x = aw_chunk_177_cast_fp16)[name = tensor("op_1177_cast_fp16")]; tensor var_1178_cast_fp16 = softmax(axis = var_749, x = aw_chunk_179_cast_fp16)[name = tensor("op_1178_cast_fp16")]; tensor var_1179_cast_fp16 = softmax(axis = var_749, x = aw_chunk_181_cast_fp16)[name = tensor("op_1179_cast_fp16")]; tensor var_1180_cast_fp16 = softmax(axis = var_749, x = aw_chunk_183_cast_fp16)[name = tensor("op_1180_cast_fp16")]; tensor var_1181_cast_fp16 = softmax(axis = var_749, x = aw_chunk_185_cast_fp16)[name = tensor("op_1181_cast_fp16")]; tensor var_1182_cast_fp16 = softmax(axis = var_749, x = aw_chunk_187_cast_fp16)[name = tensor("op_1182_cast_fp16")]; tensor var_1183_cast_fp16 = softmax(axis = var_749, x = aw_chunk_189_cast_fp16)[name = tensor("op_1183_cast_fp16")]; tensor var_1184_cast_fp16 = softmax(axis = var_749, x = aw_chunk_191_cast_fp16)[name = tensor("op_1184_cast_fp16")]; tensor var_1186_equation_0 = const()[name = tensor("op_1186_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1186_cast_fp16 = einsum(equation = var_1186_equation_0, values = (var_914_cast_fp16, var_1137_cast_fp16))[name = tensor("op_1186_cast_fp16")]; tensor var_1188_equation_0 = const()[name = tensor("op_1188_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1188_cast_fp16 = einsum(equation = var_1188_equation_0, values = (var_914_cast_fp16, var_1138_cast_fp16))[name = tensor("op_1188_cast_fp16")]; tensor var_1190_equation_0 = const()[name = tensor("op_1190_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1190_cast_fp16 = einsum(equation = var_1190_equation_0, values = (var_914_cast_fp16, var_1139_cast_fp16))[name = tensor("op_1190_cast_fp16")]; tensor var_1192_equation_0 = const()[name = tensor("op_1192_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1192_cast_fp16 = einsum(equation = var_1192_equation_0, values = (var_914_cast_fp16, var_1140_cast_fp16))[name = tensor("op_1192_cast_fp16")]; tensor var_1194_equation_0 = const()[name = tensor("op_1194_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1194_cast_fp16 = einsum(equation = var_1194_equation_0, values = (var_914_cast_fp16, var_1141_cast_fp16))[name = tensor("op_1194_cast_fp16")]; tensor var_1196_equation_0 = const()[name = tensor("op_1196_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1196_cast_fp16 = einsum(equation = var_1196_equation_0, values = (var_914_cast_fp16, var_1142_cast_fp16))[name = tensor("op_1196_cast_fp16")]; tensor var_1198_equation_0 = const()[name = tensor("op_1198_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1198_cast_fp16 = einsum(equation = var_1198_equation_0, values = (var_918_cast_fp16, var_1143_cast_fp16))[name = tensor("op_1198_cast_fp16")]; tensor var_1200_equation_0 = const()[name = tensor("op_1200_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1200_cast_fp16 = einsum(equation = var_1200_equation_0, values = (var_918_cast_fp16, var_1144_cast_fp16))[name = tensor("op_1200_cast_fp16")]; tensor var_1202_equation_0 = const()[name = tensor("op_1202_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1202_cast_fp16 = einsum(equation = var_1202_equation_0, values = (var_918_cast_fp16, var_1145_cast_fp16))[name = tensor("op_1202_cast_fp16")]; tensor var_1204_equation_0 = const()[name = tensor("op_1204_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1204_cast_fp16 = einsum(equation = var_1204_equation_0, values = (var_918_cast_fp16, var_1146_cast_fp16))[name = tensor("op_1204_cast_fp16")]; tensor var_1206_equation_0 = const()[name = tensor("op_1206_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1206_cast_fp16 = einsum(equation = var_1206_equation_0, values = (var_918_cast_fp16, var_1147_cast_fp16))[name = tensor("op_1206_cast_fp16")]; tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_918_cast_fp16, var_1148_cast_fp16))[name = tensor("op_1208_cast_fp16")]; tensor var_1210_equation_0 = const()[name = tensor("op_1210_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1210_cast_fp16 = einsum(equation = var_1210_equation_0, values = (var_922_cast_fp16, var_1149_cast_fp16))[name = tensor("op_1210_cast_fp16")]; tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_922_cast_fp16, var_1150_cast_fp16))[name = tensor("op_1212_cast_fp16")]; tensor var_1214_equation_0 = const()[name = tensor("op_1214_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1214_cast_fp16 = einsum(equation = var_1214_equation_0, values = (var_922_cast_fp16, var_1151_cast_fp16))[name = tensor("op_1214_cast_fp16")]; tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_922_cast_fp16, var_1152_cast_fp16))[name = tensor("op_1216_cast_fp16")]; tensor var_1218_equation_0 = const()[name = tensor("op_1218_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1218_cast_fp16 = einsum(equation = var_1218_equation_0, values = (var_922_cast_fp16, var_1153_cast_fp16))[name = tensor("op_1218_cast_fp16")]; tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_922_cast_fp16, var_1154_cast_fp16))[name = tensor("op_1220_cast_fp16")]; tensor var_1222_equation_0 = const()[name = tensor("op_1222_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1222_cast_fp16 = einsum(equation = var_1222_equation_0, values = (var_926_cast_fp16, var_1155_cast_fp16))[name = tensor("op_1222_cast_fp16")]; tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_926_cast_fp16, var_1156_cast_fp16))[name = tensor("op_1224_cast_fp16")]; tensor var_1226_equation_0 = const()[name = tensor("op_1226_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1226_cast_fp16 = einsum(equation = var_1226_equation_0, values = (var_926_cast_fp16, var_1157_cast_fp16))[name = tensor("op_1226_cast_fp16")]; tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_926_cast_fp16, var_1158_cast_fp16))[name = tensor("op_1228_cast_fp16")]; tensor var_1230_equation_0 = const()[name = tensor("op_1230_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1230_cast_fp16 = einsum(equation = var_1230_equation_0, values = (var_926_cast_fp16, var_1159_cast_fp16))[name = tensor("op_1230_cast_fp16")]; tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_926_cast_fp16, var_1160_cast_fp16))[name = tensor("op_1232_cast_fp16")]; tensor var_1234_equation_0 = const()[name = tensor("op_1234_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1234_cast_fp16 = einsum(equation = var_1234_equation_0, values = (var_930_cast_fp16, var_1161_cast_fp16))[name = tensor("op_1234_cast_fp16")]; tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_930_cast_fp16, var_1162_cast_fp16))[name = tensor("op_1236_cast_fp16")]; tensor var_1238_equation_0 = const()[name = tensor("op_1238_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_930_cast_fp16, var_1163_cast_fp16))[name = tensor("op_1238_cast_fp16")]; tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_930_cast_fp16, var_1164_cast_fp16))[name = tensor("op_1240_cast_fp16")]; tensor var_1242_equation_0 = const()[name = tensor("op_1242_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_930_cast_fp16, var_1165_cast_fp16))[name = tensor("op_1242_cast_fp16")]; tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_930_cast_fp16, var_1166_cast_fp16))[name = tensor("op_1244_cast_fp16")]; tensor var_1246_equation_0 = const()[name = tensor("op_1246_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_934_cast_fp16, var_1167_cast_fp16))[name = tensor("op_1246_cast_fp16")]; tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_934_cast_fp16, var_1168_cast_fp16))[name = tensor("op_1248_cast_fp16")]; tensor var_1250_equation_0 = const()[name = tensor("op_1250_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_934_cast_fp16, var_1169_cast_fp16))[name = tensor("op_1250_cast_fp16")]; tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_934_cast_fp16, var_1170_cast_fp16))[name = tensor("op_1252_cast_fp16")]; tensor var_1254_equation_0 = const()[name = tensor("op_1254_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1254_cast_fp16 = einsum(equation = var_1254_equation_0, values = (var_934_cast_fp16, var_1171_cast_fp16))[name = tensor("op_1254_cast_fp16")]; tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_934_cast_fp16, var_1172_cast_fp16))[name = tensor("op_1256_cast_fp16")]; tensor var_1258_equation_0 = const()[name = tensor("op_1258_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1258_cast_fp16 = einsum(equation = var_1258_equation_0, values = (var_938_cast_fp16, var_1173_cast_fp16))[name = tensor("op_1258_cast_fp16")]; tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_938_cast_fp16, var_1174_cast_fp16))[name = tensor("op_1260_cast_fp16")]; tensor var_1262_equation_0 = const()[name = tensor("op_1262_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1262_cast_fp16 = einsum(equation = var_1262_equation_0, values = (var_938_cast_fp16, var_1175_cast_fp16))[name = tensor("op_1262_cast_fp16")]; tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_938_cast_fp16, var_1176_cast_fp16))[name = tensor("op_1264_cast_fp16")]; tensor var_1266_equation_0 = const()[name = tensor("op_1266_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1266_cast_fp16 = einsum(equation = var_1266_equation_0, values = (var_938_cast_fp16, var_1177_cast_fp16))[name = tensor("op_1266_cast_fp16")]; tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_938_cast_fp16, var_1178_cast_fp16))[name = tensor("op_1268_cast_fp16")]; tensor var_1270_equation_0 = const()[name = tensor("op_1270_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1270_cast_fp16 = einsum(equation = var_1270_equation_0, values = (var_942_cast_fp16, var_1179_cast_fp16))[name = tensor("op_1270_cast_fp16")]; tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_942_cast_fp16, var_1180_cast_fp16))[name = tensor("op_1272_cast_fp16")]; tensor var_1274_equation_0 = const()[name = tensor("op_1274_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1274_cast_fp16 = einsum(equation = var_1274_equation_0, values = (var_942_cast_fp16, var_1181_cast_fp16))[name = tensor("op_1274_cast_fp16")]; tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_942_cast_fp16, var_1182_cast_fp16))[name = tensor("op_1276_cast_fp16")]; tensor var_1278_equation_0 = const()[name = tensor("op_1278_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1278_cast_fp16 = einsum(equation = var_1278_equation_0, values = (var_942_cast_fp16, var_1183_cast_fp16))[name = tensor("op_1278_cast_fp16")]; tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_942_cast_fp16, var_1184_cast_fp16))[name = tensor("op_1280_cast_fp16")]; tensor var_1282_interleave_0 = const()[name = tensor("op_1282_interleave_0"), val = tensor(false)]; tensor var_1282_cast_fp16 = concat(axis = var_736, interleave = var_1282_interleave_0, values = (var_1186_cast_fp16, var_1188_cast_fp16, var_1190_cast_fp16, var_1192_cast_fp16, var_1194_cast_fp16, var_1196_cast_fp16))[name = tensor("op_1282_cast_fp16")]; tensor var_1284_interleave_0 = const()[name = tensor("op_1284_interleave_0"), val = tensor(false)]; tensor var_1284_cast_fp16 = concat(axis = var_736, interleave = var_1284_interleave_0, values = (var_1198_cast_fp16, var_1200_cast_fp16, var_1202_cast_fp16, var_1204_cast_fp16, var_1206_cast_fp16, var_1208_cast_fp16))[name = tensor("op_1284_cast_fp16")]; tensor var_1286_interleave_0 = const()[name = tensor("op_1286_interleave_0"), val = tensor(false)]; tensor var_1286_cast_fp16 = concat(axis = var_736, interleave = var_1286_interleave_0, values = (var_1210_cast_fp16, var_1212_cast_fp16, var_1214_cast_fp16, var_1216_cast_fp16, var_1218_cast_fp16, var_1220_cast_fp16))[name = tensor("op_1286_cast_fp16")]; tensor var_1288_interleave_0 = const()[name = tensor("op_1288_interleave_0"), val = tensor(false)]; tensor var_1288_cast_fp16 = concat(axis = var_736, interleave = var_1288_interleave_0, values = (var_1222_cast_fp16, var_1224_cast_fp16, var_1226_cast_fp16, var_1228_cast_fp16, var_1230_cast_fp16, var_1232_cast_fp16))[name = tensor("op_1288_cast_fp16")]; tensor var_1290_interleave_0 = const()[name = tensor("op_1290_interleave_0"), val = tensor(false)]; tensor var_1290_cast_fp16 = concat(axis = var_736, interleave = var_1290_interleave_0, values = (var_1234_cast_fp16, var_1236_cast_fp16, var_1238_cast_fp16, var_1240_cast_fp16, var_1242_cast_fp16, var_1244_cast_fp16))[name = tensor("op_1290_cast_fp16")]; tensor var_1292_interleave_0 = const()[name = tensor("op_1292_interleave_0"), val = tensor(false)]; tensor var_1292_cast_fp16 = concat(axis = var_736, interleave = var_1292_interleave_0, values = (var_1246_cast_fp16, var_1248_cast_fp16, var_1250_cast_fp16, var_1252_cast_fp16, var_1254_cast_fp16, var_1256_cast_fp16))[name = tensor("op_1292_cast_fp16")]; tensor var_1294_interleave_0 = const()[name = tensor("op_1294_interleave_0"), val = tensor(false)]; tensor var_1294_cast_fp16 = concat(axis = var_736, interleave = var_1294_interleave_0, values = (var_1258_cast_fp16, var_1260_cast_fp16, var_1262_cast_fp16, var_1264_cast_fp16, var_1266_cast_fp16, var_1268_cast_fp16))[name = tensor("op_1294_cast_fp16")]; tensor var_1296_interleave_0 = const()[name = tensor("op_1296_interleave_0"), val = tensor(false)]; tensor var_1296_cast_fp16 = concat(axis = var_736, interleave = var_1296_interleave_0, values = (var_1270_cast_fp16, var_1272_cast_fp16, var_1274_cast_fp16, var_1276_cast_fp16, var_1278_cast_fp16, var_1280_cast_fp16))[name = tensor("op_1296_cast_fp16")]; tensor input_9_interleave_0 = const()[name = tensor("input_9_interleave_0"), val = tensor(false)]; tensor input_9_cast_fp16 = concat(axis = var_749, interleave = input_9_interleave_0, values = (var_1282_cast_fp16, var_1284_cast_fp16, var_1286_cast_fp16, var_1288_cast_fp16, var_1290_cast_fp16, var_1292_cast_fp16, var_1294_cast_fp16, var_1296_cast_fp16))[name = tensor("input_9_cast_fp16")]; tensor obj_7_pad_type_0 = const()[name = tensor("obj_7_pad_type_0"), val = tensor("valid")]; tensor obj_7_strides_0 = const()[name = tensor("obj_7_strides_0"), val = tensor([1, 1])]; tensor obj_7_pad_0 = const()[name = tensor("obj_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_7_dilations_0 = const()[name = tensor("obj_7_dilations_0"), val = tensor([1, 1])]; tensor obj_7_groups_0 = const()[name = tensor("obj_7_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11241344)))]; tensor layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11765696)))]; tensor obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("obj_7_cast_fp16")]; tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; tensor var_1315_to_fp16 = const()[name = tensor("op_1315_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_1315_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11766784)))]; tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11767872)))]; tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor input_13_pad_type_0 = const()[name = tensor("input_13_pad_type_0"), val = tensor("valid")]; tensor input_13_strides_0 = const()[name = tensor("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = tensor("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = tensor("input_13_dilations_0"), val = tensor([1, 1])]; tensor input_13_groups_0 = const()[name = tensor("input_13_groups_0"), val = tensor(1)]; tensor layers_1_fc1_weight_to_fp16 = const()[name = tensor("layers_1_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11768960)))]; tensor layers_1_fc1_bias_to_fp16 = const()[name = tensor("layers_1_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13866176)))]; tensor input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor hidden_states_7_pad_type_0 = const()[name = tensor("hidden_states_7_pad_type_0"), val = tensor("valid")]; tensor hidden_states_7_strides_0 = const()[name = tensor("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = tensor("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = tensor("hidden_states_7_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_7_groups_0 = const()[name = tensor("hidden_states_7_groups_0"), val = tensor(1)]; tensor layers_1_fc2_weight_to_fp16 = const()[name = tensor("layers_1_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13870336)))]; tensor layers_1_fc2_bias_to_fp16 = const()[name = tensor("layers_1_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15967552)))]; tensor hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; tensor var_1347 = const()[name = tensor("op_1347"), val = tensor(3)]; tensor var_1360 = const()[name = tensor("op_1360"), val = tensor(1)]; tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; tensor var_1377_to_fp16 = const()[name = tensor("op_1377_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_1377_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15968640)))]; tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15969728)))]; tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; tensor query_5_pad_type_0 = const()[name = tensor("query_5_pad_type_0"), val = tensor("valid")]; tensor query_5_strides_0 = const()[name = tensor("query_5_strides_0"), val = tensor([1, 1])]; tensor query_5_pad_0 = const()[name = tensor("query_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_5_dilations_0 = const()[name = tensor("query_5_dilations_0"), val = tensor([1, 1])]; tensor query_5_groups_0 = const()[name = tensor("query_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15970816)))]; tensor layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16495168)))]; tensor query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("query_5_cast_fp16")]; tensor key_5_pad_type_0 = const()[name = tensor("key_5_pad_type_0"), val = tensor("valid")]; tensor key_5_strides_0 = const()[name = tensor("key_5_strides_0"), val = tensor([1, 1])]; tensor key_5_pad_0 = const()[name = tensor("key_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_5_dilations_0 = const()[name = tensor("key_5_dilations_0"), val = tensor([1, 1])]; tensor key_5_groups_0 = const()[name = tensor("key_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16496256)))]; tensor key_5_cast_fp16 = conv(dilations = key_5_dilations_0, groups = key_5_groups_0, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("key_5_cast_fp16")]; tensor value_5_pad_type_0 = const()[name = tensor("value_5_pad_type_0"), val = tensor("valid")]; tensor value_5_strides_0 = const()[name = tensor("value_5_strides_0"), val = tensor([1, 1])]; tensor value_5_pad_0 = const()[name = tensor("value_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_5_dilations_0 = const()[name = tensor("value_5_dilations_0"), val = tensor([1, 1])]; tensor value_5_groups_0 = const()[name = tensor("value_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17020608)))]; tensor layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17544960)))]; tensor value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = value_5_dilations_0, groups = value_5_groups_0, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("value_5_cast_fp16")]; tensor var_1412_begin_0 = const()[name = tensor("op_1412_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1412_end_0 = const()[name = tensor("op_1412_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1412_end_mask_0 = const()[name = tensor("op_1412_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1412_cast_fp16 = slice_by_index(begin = var_1412_begin_0, end = var_1412_end_0, end_mask = var_1412_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1412_cast_fp16")]; tensor var_1416_begin_0 = const()[name = tensor("op_1416_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1416_end_0 = const()[name = tensor("op_1416_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1416_end_mask_0 = const()[name = tensor("op_1416_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1416_cast_fp16 = slice_by_index(begin = var_1416_begin_0, end = var_1416_end_0, end_mask = var_1416_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1416_cast_fp16")]; tensor var_1420_begin_0 = const()[name = tensor("op_1420_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1420_end_0 = const()[name = tensor("op_1420_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1420_end_mask_0 = const()[name = tensor("op_1420_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1420_cast_fp16 = slice_by_index(begin = var_1420_begin_0, end = var_1420_end_0, end_mask = var_1420_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1420_cast_fp16")]; tensor var_1424_begin_0 = const()[name = tensor("op_1424_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1424_end_0 = const()[name = tensor("op_1424_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1424_end_mask_0 = const()[name = tensor("op_1424_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1424_cast_fp16 = slice_by_index(begin = var_1424_begin_0, end = var_1424_end_0, end_mask = var_1424_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1424_cast_fp16")]; tensor var_1428_begin_0 = const()[name = tensor("op_1428_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1428_end_0 = const()[name = tensor("op_1428_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1428_end_mask_0 = const()[name = tensor("op_1428_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1428_cast_fp16 = slice_by_index(begin = var_1428_begin_0, end = var_1428_end_0, end_mask = var_1428_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1428_cast_fp16")]; tensor var_1432_begin_0 = const()[name = tensor("op_1432_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1432_end_0 = const()[name = tensor("op_1432_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1432_end_mask_0 = const()[name = tensor("op_1432_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1432_cast_fp16 = slice_by_index(begin = var_1432_begin_0, end = var_1432_end_0, end_mask = var_1432_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1432_cast_fp16")]; tensor var_1436_begin_0 = const()[name = tensor("op_1436_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1436_end_0 = const()[name = tensor("op_1436_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1436_end_mask_0 = const()[name = tensor("op_1436_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1436_cast_fp16 = slice_by_index(begin = var_1436_begin_0, end = var_1436_end_0, end_mask = var_1436_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1436_cast_fp16")]; tensor var_1440_begin_0 = const()[name = tensor("op_1440_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1440_end_0 = const()[name = tensor("op_1440_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1440_end_mask_0 = const()[name = tensor("op_1440_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1440_cast_fp16 = slice_by_index(begin = var_1440_begin_0, end = var_1440_end_0, end_mask = var_1440_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1440_cast_fp16")]; tensor var_1443_begin_0 = const()[name = tensor("op_1443_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1443_end_0 = const()[name = tensor("op_1443_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1443_end_mask_0 = const()[name = tensor("op_1443_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1443_cast_fp16 = slice_by_index(begin = var_1443_begin_0, end = var_1443_end_0, end_mask = var_1443_end_mask_0, x = var_1412_cast_fp16)[name = tensor("op_1443_cast_fp16")]; tensor var_1444_begin_0 = const()[name = tensor("op_1444_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1444_end_0 = const()[name = tensor("op_1444_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1444_end_mask_0 = const()[name = tensor("op_1444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1444_cast_fp16 = slice_by_index(begin = var_1444_begin_0, end = var_1444_end_0, end_mask = var_1444_end_mask_0, x = var_1412_cast_fp16)[name = tensor("op_1444_cast_fp16")]; tensor var_1445_begin_0 = const()[name = tensor("op_1445_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1445_end_0 = const()[name = tensor("op_1445_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1445_end_mask_0 = const()[name = tensor("op_1445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1445_cast_fp16 = slice_by_index(begin = var_1445_begin_0, end = var_1445_end_0, end_mask = var_1445_end_mask_0, x = var_1412_cast_fp16)[name = tensor("op_1445_cast_fp16")]; tensor var_1446_begin_0 = const()[name = tensor("op_1446_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1446_end_0 = const()[name = tensor("op_1446_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1446_end_mask_0 = const()[name = tensor("op_1446_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1446_cast_fp16 = slice_by_index(begin = var_1446_begin_0, end = var_1446_end_0, end_mask = var_1446_end_mask_0, x = var_1412_cast_fp16)[name = tensor("op_1446_cast_fp16")]; tensor var_1447_begin_0 = const()[name = tensor("op_1447_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1447_end_0 = const()[name = tensor("op_1447_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1447_end_mask_0 = const()[name = tensor("op_1447_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1447_cast_fp16 = slice_by_index(begin = var_1447_begin_0, end = var_1447_end_0, end_mask = var_1447_end_mask_0, x = var_1412_cast_fp16)[name = tensor("op_1447_cast_fp16")]; tensor var_1448_begin_0 = const()[name = tensor("op_1448_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1448_end_0 = const()[name = tensor("op_1448_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1448_end_mask_0 = const()[name = tensor("op_1448_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1448_cast_fp16 = slice_by_index(begin = var_1448_begin_0, end = var_1448_end_0, end_mask = var_1448_end_mask_0, x = var_1412_cast_fp16)[name = tensor("op_1448_cast_fp16")]; tensor var_1449_begin_0 = const()[name = tensor("op_1449_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1449_end_0 = const()[name = tensor("op_1449_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1449_end_mask_0 = const()[name = tensor("op_1449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1449_cast_fp16 = slice_by_index(begin = var_1449_begin_0, end = var_1449_end_0, end_mask = var_1449_end_mask_0, x = var_1416_cast_fp16)[name = tensor("op_1449_cast_fp16")]; tensor var_1450_begin_0 = const()[name = tensor("op_1450_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1450_end_0 = const()[name = tensor("op_1450_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1450_end_mask_0 = const()[name = tensor("op_1450_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1450_cast_fp16 = slice_by_index(begin = var_1450_begin_0, end = var_1450_end_0, end_mask = var_1450_end_mask_0, x = var_1416_cast_fp16)[name = tensor("op_1450_cast_fp16")]; tensor var_1451_begin_0 = const()[name = tensor("op_1451_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1451_end_0 = const()[name = tensor("op_1451_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1451_end_mask_0 = const()[name = tensor("op_1451_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1451_cast_fp16 = slice_by_index(begin = var_1451_begin_0, end = var_1451_end_0, end_mask = var_1451_end_mask_0, x = var_1416_cast_fp16)[name = tensor("op_1451_cast_fp16")]; tensor var_1452_begin_0 = const()[name = tensor("op_1452_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1452_end_0 = const()[name = tensor("op_1452_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1452_end_mask_0 = const()[name = tensor("op_1452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1452_cast_fp16 = slice_by_index(begin = var_1452_begin_0, end = var_1452_end_0, end_mask = var_1452_end_mask_0, x = var_1416_cast_fp16)[name = tensor("op_1452_cast_fp16")]; tensor var_1453_begin_0 = const()[name = tensor("op_1453_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1453_end_0 = const()[name = tensor("op_1453_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1453_end_mask_0 = const()[name = tensor("op_1453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = var_1453_end_0, end_mask = var_1453_end_mask_0, x = var_1416_cast_fp16)[name = tensor("op_1453_cast_fp16")]; tensor var_1454_begin_0 = const()[name = tensor("op_1454_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1454_end_0 = const()[name = tensor("op_1454_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1454_end_mask_0 = const()[name = tensor("op_1454_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = var_1416_cast_fp16)[name = tensor("op_1454_cast_fp16")]; tensor var_1455_begin_0 = const()[name = tensor("op_1455_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1455_end_0 = const()[name = tensor("op_1455_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1455_end_mask_0 = const()[name = tensor("op_1455_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1455_cast_fp16 = slice_by_index(begin = var_1455_begin_0, end = var_1455_end_0, end_mask = var_1455_end_mask_0, x = var_1420_cast_fp16)[name = tensor("op_1455_cast_fp16")]; tensor var_1456_begin_0 = const()[name = tensor("op_1456_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1456_end_0 = const()[name = tensor("op_1456_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1456_end_mask_0 = const()[name = tensor("op_1456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1456_cast_fp16 = slice_by_index(begin = var_1456_begin_0, end = var_1456_end_0, end_mask = var_1456_end_mask_0, x = var_1420_cast_fp16)[name = tensor("op_1456_cast_fp16")]; tensor var_1457_begin_0 = const()[name = tensor("op_1457_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1457_end_0 = const()[name = tensor("op_1457_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1457_end_mask_0 = const()[name = tensor("op_1457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1457_cast_fp16 = slice_by_index(begin = var_1457_begin_0, end = var_1457_end_0, end_mask = var_1457_end_mask_0, x = var_1420_cast_fp16)[name = tensor("op_1457_cast_fp16")]; tensor var_1458_begin_0 = const()[name = tensor("op_1458_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1458_end_0 = const()[name = tensor("op_1458_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1458_end_mask_0 = const()[name = tensor("op_1458_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1458_cast_fp16 = slice_by_index(begin = var_1458_begin_0, end = var_1458_end_0, end_mask = var_1458_end_mask_0, x = var_1420_cast_fp16)[name = tensor("op_1458_cast_fp16")]; tensor var_1459_begin_0 = const()[name = tensor("op_1459_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1459_end_0 = const()[name = tensor("op_1459_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1459_end_mask_0 = const()[name = tensor("op_1459_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1459_cast_fp16 = slice_by_index(begin = var_1459_begin_0, end = var_1459_end_0, end_mask = var_1459_end_mask_0, x = var_1420_cast_fp16)[name = tensor("op_1459_cast_fp16")]; tensor var_1460_begin_0 = const()[name = tensor("op_1460_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1460_end_0 = const()[name = tensor("op_1460_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1460_end_mask_0 = const()[name = tensor("op_1460_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1460_cast_fp16 = slice_by_index(begin = var_1460_begin_0, end = var_1460_end_0, end_mask = var_1460_end_mask_0, x = var_1420_cast_fp16)[name = tensor("op_1460_cast_fp16")]; tensor var_1461_begin_0 = const()[name = tensor("op_1461_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1461_end_0 = const()[name = tensor("op_1461_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1461_end_mask_0 = const()[name = tensor("op_1461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1461_cast_fp16 = slice_by_index(begin = var_1461_begin_0, end = var_1461_end_0, end_mask = var_1461_end_mask_0, x = var_1424_cast_fp16)[name = tensor("op_1461_cast_fp16")]; tensor var_1462_begin_0 = const()[name = tensor("op_1462_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1462_end_0 = const()[name = tensor("op_1462_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1462_end_mask_0 = const()[name = tensor("op_1462_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1462_cast_fp16 = slice_by_index(begin = var_1462_begin_0, end = var_1462_end_0, end_mask = var_1462_end_mask_0, x = var_1424_cast_fp16)[name = tensor("op_1462_cast_fp16")]; tensor var_1463_begin_0 = const()[name = tensor("op_1463_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1463_end_0 = const()[name = tensor("op_1463_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1463_end_mask_0 = const()[name = tensor("op_1463_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1463_cast_fp16 = slice_by_index(begin = var_1463_begin_0, end = var_1463_end_0, end_mask = var_1463_end_mask_0, x = var_1424_cast_fp16)[name = tensor("op_1463_cast_fp16")]; tensor var_1464_begin_0 = const()[name = tensor("op_1464_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1464_end_0 = const()[name = tensor("op_1464_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1464_end_mask_0 = const()[name = tensor("op_1464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1464_cast_fp16 = slice_by_index(begin = var_1464_begin_0, end = var_1464_end_0, end_mask = var_1464_end_mask_0, x = var_1424_cast_fp16)[name = tensor("op_1464_cast_fp16")]; tensor var_1465_begin_0 = const()[name = tensor("op_1465_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1465_end_0 = const()[name = tensor("op_1465_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1465_end_mask_0 = const()[name = tensor("op_1465_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1465_cast_fp16 = slice_by_index(begin = var_1465_begin_0, end = var_1465_end_0, end_mask = var_1465_end_mask_0, x = var_1424_cast_fp16)[name = tensor("op_1465_cast_fp16")]; tensor var_1466_begin_0 = const()[name = tensor("op_1466_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1466_end_0 = const()[name = tensor("op_1466_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1466_end_mask_0 = const()[name = tensor("op_1466_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1466_cast_fp16 = slice_by_index(begin = var_1466_begin_0, end = var_1466_end_0, end_mask = var_1466_end_mask_0, x = var_1424_cast_fp16)[name = tensor("op_1466_cast_fp16")]; tensor var_1467_begin_0 = const()[name = tensor("op_1467_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1467_end_0 = const()[name = tensor("op_1467_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1467_end_mask_0 = const()[name = tensor("op_1467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1467_cast_fp16 = slice_by_index(begin = var_1467_begin_0, end = var_1467_end_0, end_mask = var_1467_end_mask_0, x = var_1428_cast_fp16)[name = tensor("op_1467_cast_fp16")]; tensor var_1468_begin_0 = const()[name = tensor("op_1468_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1468_end_0 = const()[name = tensor("op_1468_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1468_end_mask_0 = const()[name = tensor("op_1468_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1468_cast_fp16 = slice_by_index(begin = var_1468_begin_0, end = var_1468_end_0, end_mask = var_1468_end_mask_0, x = var_1428_cast_fp16)[name = tensor("op_1468_cast_fp16")]; tensor var_1469_begin_0 = const()[name = tensor("op_1469_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1469_end_0 = const()[name = tensor("op_1469_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1469_end_mask_0 = const()[name = tensor("op_1469_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1469_cast_fp16 = slice_by_index(begin = var_1469_begin_0, end = var_1469_end_0, end_mask = var_1469_end_mask_0, x = var_1428_cast_fp16)[name = tensor("op_1469_cast_fp16")]; tensor var_1470_begin_0 = const()[name = tensor("op_1470_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1470_end_0 = const()[name = tensor("op_1470_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1470_end_mask_0 = const()[name = tensor("op_1470_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1470_cast_fp16 = slice_by_index(begin = var_1470_begin_0, end = var_1470_end_0, end_mask = var_1470_end_mask_0, x = var_1428_cast_fp16)[name = tensor("op_1470_cast_fp16")]; tensor var_1471_begin_0 = const()[name = tensor("op_1471_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1471_end_0 = const()[name = tensor("op_1471_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1471_end_mask_0 = const()[name = tensor("op_1471_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1471_cast_fp16 = slice_by_index(begin = var_1471_begin_0, end = var_1471_end_0, end_mask = var_1471_end_mask_0, x = var_1428_cast_fp16)[name = tensor("op_1471_cast_fp16")]; tensor var_1472_begin_0 = const()[name = tensor("op_1472_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1472_end_0 = const()[name = tensor("op_1472_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1472_end_mask_0 = const()[name = tensor("op_1472_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1472_cast_fp16 = slice_by_index(begin = var_1472_begin_0, end = var_1472_end_0, end_mask = var_1472_end_mask_0, x = var_1428_cast_fp16)[name = tensor("op_1472_cast_fp16")]; tensor var_1473_begin_0 = const()[name = tensor("op_1473_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1473_end_0 = const()[name = tensor("op_1473_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1473_end_mask_0 = const()[name = tensor("op_1473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1473_cast_fp16 = slice_by_index(begin = var_1473_begin_0, end = var_1473_end_0, end_mask = var_1473_end_mask_0, x = var_1432_cast_fp16)[name = tensor("op_1473_cast_fp16")]; tensor var_1474_begin_0 = const()[name = tensor("op_1474_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1474_end_0 = const()[name = tensor("op_1474_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1474_end_mask_0 = const()[name = tensor("op_1474_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1474_cast_fp16 = slice_by_index(begin = var_1474_begin_0, end = var_1474_end_0, end_mask = var_1474_end_mask_0, x = var_1432_cast_fp16)[name = tensor("op_1474_cast_fp16")]; tensor var_1475_begin_0 = const()[name = tensor("op_1475_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1475_end_0 = const()[name = tensor("op_1475_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1475_end_mask_0 = const()[name = tensor("op_1475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1475_cast_fp16 = slice_by_index(begin = var_1475_begin_0, end = var_1475_end_0, end_mask = var_1475_end_mask_0, x = var_1432_cast_fp16)[name = tensor("op_1475_cast_fp16")]; tensor var_1476_begin_0 = const()[name = tensor("op_1476_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1476_end_0 = const()[name = tensor("op_1476_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1476_end_mask_0 = const()[name = tensor("op_1476_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1476_cast_fp16 = slice_by_index(begin = var_1476_begin_0, end = var_1476_end_0, end_mask = var_1476_end_mask_0, x = var_1432_cast_fp16)[name = tensor("op_1476_cast_fp16")]; tensor var_1477_begin_0 = const()[name = tensor("op_1477_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1477_end_0 = const()[name = tensor("op_1477_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1477_end_mask_0 = const()[name = tensor("op_1477_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1477_cast_fp16 = slice_by_index(begin = var_1477_begin_0, end = var_1477_end_0, end_mask = var_1477_end_mask_0, x = var_1432_cast_fp16)[name = tensor("op_1477_cast_fp16")]; tensor var_1478_begin_0 = const()[name = tensor("op_1478_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1478_end_0 = const()[name = tensor("op_1478_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1478_end_mask_0 = const()[name = tensor("op_1478_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1478_cast_fp16 = slice_by_index(begin = var_1478_begin_0, end = var_1478_end_0, end_mask = var_1478_end_mask_0, x = var_1432_cast_fp16)[name = tensor("op_1478_cast_fp16")]; tensor var_1479_begin_0 = const()[name = tensor("op_1479_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1479_end_0 = const()[name = tensor("op_1479_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1479_end_mask_0 = const()[name = tensor("op_1479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1479_cast_fp16 = slice_by_index(begin = var_1479_begin_0, end = var_1479_end_0, end_mask = var_1479_end_mask_0, x = var_1436_cast_fp16)[name = tensor("op_1479_cast_fp16")]; tensor var_1480_begin_0 = const()[name = tensor("op_1480_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1480_end_0 = const()[name = tensor("op_1480_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1480_end_mask_0 = const()[name = tensor("op_1480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1480_cast_fp16 = slice_by_index(begin = var_1480_begin_0, end = var_1480_end_0, end_mask = var_1480_end_mask_0, x = var_1436_cast_fp16)[name = tensor("op_1480_cast_fp16")]; tensor var_1481_begin_0 = const()[name = tensor("op_1481_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1481_end_0 = const()[name = tensor("op_1481_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1481_end_mask_0 = const()[name = tensor("op_1481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1481_cast_fp16 = slice_by_index(begin = var_1481_begin_0, end = var_1481_end_0, end_mask = var_1481_end_mask_0, x = var_1436_cast_fp16)[name = tensor("op_1481_cast_fp16")]; tensor var_1482_begin_0 = const()[name = tensor("op_1482_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1482_end_0 = const()[name = tensor("op_1482_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1482_end_mask_0 = const()[name = tensor("op_1482_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1482_cast_fp16 = slice_by_index(begin = var_1482_begin_0, end = var_1482_end_0, end_mask = var_1482_end_mask_0, x = var_1436_cast_fp16)[name = tensor("op_1482_cast_fp16")]; tensor var_1483_begin_0 = const()[name = tensor("op_1483_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1483_end_0 = const()[name = tensor("op_1483_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1483_end_mask_0 = const()[name = tensor("op_1483_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1483_cast_fp16 = slice_by_index(begin = var_1483_begin_0, end = var_1483_end_0, end_mask = var_1483_end_mask_0, x = var_1436_cast_fp16)[name = tensor("op_1483_cast_fp16")]; tensor var_1484_begin_0 = const()[name = tensor("op_1484_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1484_end_0 = const()[name = tensor("op_1484_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1484_end_mask_0 = const()[name = tensor("op_1484_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1484_cast_fp16 = slice_by_index(begin = var_1484_begin_0, end = var_1484_end_0, end_mask = var_1484_end_mask_0, x = var_1436_cast_fp16)[name = tensor("op_1484_cast_fp16")]; tensor var_1485_begin_0 = const()[name = tensor("op_1485_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1485_end_0 = const()[name = tensor("op_1485_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1485_end_mask_0 = const()[name = tensor("op_1485_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1485_cast_fp16 = slice_by_index(begin = var_1485_begin_0, end = var_1485_end_0, end_mask = var_1485_end_mask_0, x = var_1440_cast_fp16)[name = tensor("op_1485_cast_fp16")]; tensor var_1486_begin_0 = const()[name = tensor("op_1486_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1486_end_0 = const()[name = tensor("op_1486_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1486_end_mask_0 = const()[name = tensor("op_1486_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, x = var_1440_cast_fp16)[name = tensor("op_1486_cast_fp16")]; tensor var_1487_begin_0 = const()[name = tensor("op_1487_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1487_end_0 = const()[name = tensor("op_1487_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1487_end_mask_0 = const()[name = tensor("op_1487_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1487_cast_fp16 = slice_by_index(begin = var_1487_begin_0, end = var_1487_end_0, end_mask = var_1487_end_mask_0, x = var_1440_cast_fp16)[name = tensor("op_1487_cast_fp16")]; tensor var_1488_begin_0 = const()[name = tensor("op_1488_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1488_end_0 = const()[name = tensor("op_1488_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1488_end_mask_0 = const()[name = tensor("op_1488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1488_cast_fp16 = slice_by_index(begin = var_1488_begin_0, end = var_1488_end_0, end_mask = var_1488_end_mask_0, x = var_1440_cast_fp16)[name = tensor("op_1488_cast_fp16")]; tensor var_1489_begin_0 = const()[name = tensor("op_1489_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1489_end_0 = const()[name = tensor("op_1489_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1489_end_mask_0 = const()[name = tensor("op_1489_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1489_cast_fp16 = slice_by_index(begin = var_1489_begin_0, end = var_1489_end_0, end_mask = var_1489_end_mask_0, x = var_1440_cast_fp16)[name = tensor("op_1489_cast_fp16")]; tensor var_1490_begin_0 = const()[name = tensor("op_1490_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1490_end_0 = const()[name = tensor("op_1490_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1490_end_mask_0 = const()[name = tensor("op_1490_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1490_cast_fp16 = slice_by_index(begin = var_1490_begin_0, end = var_1490_end_0, end_mask = var_1490_end_mask_0, x = var_1440_cast_fp16)[name = tensor("op_1490_cast_fp16")]; tensor k_5_perm_0 = const()[name = tensor("k_5_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1495_begin_0 = const()[name = tensor("op_1495_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1495_end_0 = const()[name = tensor("op_1495_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_1495_end_mask_0 = const()[name = tensor("op_1495_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor("transpose_3")]; tensor var_1495_cast_fp16 = slice_by_index(begin = var_1495_begin_0, end = var_1495_end_0, end_mask = var_1495_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1495_cast_fp16")]; tensor var_1499_begin_0 = const()[name = tensor("op_1499_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_1499_end_0 = const()[name = tensor("op_1499_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_1499_end_mask_0 = const()[name = tensor("op_1499_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1499_cast_fp16 = slice_by_index(begin = var_1499_begin_0, end = var_1499_end_0, end_mask = var_1499_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1499_cast_fp16")]; tensor var_1503_begin_0 = const()[name = tensor("op_1503_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_1503_end_0 = const()[name = tensor("op_1503_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_1503_end_mask_0 = const()[name = tensor("op_1503_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1503_cast_fp16 = slice_by_index(begin = var_1503_begin_0, end = var_1503_end_0, end_mask = var_1503_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1503_cast_fp16")]; tensor var_1507_begin_0 = const()[name = tensor("op_1507_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_1507_end_0 = const()[name = tensor("op_1507_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_1507_end_mask_0 = const()[name = tensor("op_1507_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1507_cast_fp16 = slice_by_index(begin = var_1507_begin_0, end = var_1507_end_0, end_mask = var_1507_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1507_cast_fp16")]; tensor var_1511_begin_0 = const()[name = tensor("op_1511_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1511_end_0 = const()[name = tensor("op_1511_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_1511_end_mask_0 = const()[name = tensor("op_1511_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1511_cast_fp16 = slice_by_index(begin = var_1511_begin_0, end = var_1511_end_0, end_mask = var_1511_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1511_cast_fp16")]; tensor var_1515_begin_0 = const()[name = tensor("op_1515_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_1515_end_0 = const()[name = tensor("op_1515_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_1515_end_mask_0 = const()[name = tensor("op_1515_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1515_cast_fp16 = slice_by_index(begin = var_1515_begin_0, end = var_1515_end_0, end_mask = var_1515_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1515_cast_fp16")]; tensor var_1519_begin_0 = const()[name = tensor("op_1519_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_1519_end_0 = const()[name = tensor("op_1519_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_1519_end_mask_0 = const()[name = tensor("op_1519_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1519_cast_fp16 = slice_by_index(begin = var_1519_begin_0, end = var_1519_end_0, end_mask = var_1519_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1519_cast_fp16")]; tensor var_1523_begin_0 = const()[name = tensor("op_1523_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_1523_end_0 = const()[name = tensor("op_1523_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_1523_end_mask_0 = const()[name = tensor("op_1523_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1523_cast_fp16 = slice_by_index(begin = var_1523_begin_0, end = var_1523_end_0, end_mask = var_1523_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1523_cast_fp16")]; tensor var_1525_begin_0 = const()[name = tensor("op_1525_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1525_end_0 = const()[name = tensor("op_1525_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1525_end_mask_0 = const()[name = tensor("op_1525_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1525_cast_fp16 = slice_by_index(begin = var_1525_begin_0, end = var_1525_end_0, end_mask = var_1525_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1525_cast_fp16")]; tensor var_1529_begin_0 = const()[name = tensor("op_1529_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1529_end_0 = const()[name = tensor("op_1529_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1529_end_mask_0 = const()[name = tensor("op_1529_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1529_cast_fp16 = slice_by_index(begin = var_1529_begin_0, end = var_1529_end_0, end_mask = var_1529_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1529_cast_fp16")]; tensor var_1533_begin_0 = const()[name = tensor("op_1533_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1533_end_0 = const()[name = tensor("op_1533_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1533_end_mask_0 = const()[name = tensor("op_1533_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1533_cast_fp16 = slice_by_index(begin = var_1533_begin_0, end = var_1533_end_0, end_mask = var_1533_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1533_cast_fp16")]; tensor var_1537_begin_0 = const()[name = tensor("op_1537_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1537_end_0 = const()[name = tensor("op_1537_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1537_end_mask_0 = const()[name = tensor("op_1537_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1537_cast_fp16 = slice_by_index(begin = var_1537_begin_0, end = var_1537_end_0, end_mask = var_1537_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1537_cast_fp16")]; tensor var_1541_begin_0 = const()[name = tensor("op_1541_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1541_end_0 = const()[name = tensor("op_1541_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1541_end_mask_0 = const()[name = tensor("op_1541_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1541_cast_fp16 = slice_by_index(begin = var_1541_begin_0, end = var_1541_end_0, end_mask = var_1541_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1541_cast_fp16")]; tensor var_1545_begin_0 = const()[name = tensor("op_1545_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1545_end_0 = const()[name = tensor("op_1545_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1545_end_mask_0 = const()[name = tensor("op_1545_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1545_cast_fp16 = slice_by_index(begin = var_1545_begin_0, end = var_1545_end_0, end_mask = var_1545_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1545_cast_fp16")]; tensor var_1549_begin_0 = const()[name = tensor("op_1549_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1549_end_0 = const()[name = tensor("op_1549_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1549_end_mask_0 = const()[name = tensor("op_1549_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = var_1549_end_0, end_mask = var_1549_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1549_cast_fp16")]; tensor var_1553_begin_0 = const()[name = tensor("op_1553_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1553_end_0 = const()[name = tensor("op_1553_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1553_end_mask_0 = const()[name = tensor("op_1553_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1553_cast_fp16 = slice_by_index(begin = var_1553_begin_0, end = var_1553_end_0, end_mask = var_1553_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_193_equation_0, values = (var_1495_cast_fp16, var_1443_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_195_equation_0, values = (var_1495_cast_fp16, var_1444_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_197_equation_0, values = (var_1495_cast_fp16, var_1445_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_199_equation_0, values = (var_1495_cast_fp16, var_1446_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_201_equation_0, values = (var_1495_cast_fp16, var_1447_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_203_equation_0, values = (var_1495_cast_fp16, var_1448_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_205_equation_0, values = (var_1499_cast_fp16, var_1449_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_207_equation_0, values = (var_1499_cast_fp16, var_1450_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_209_equation_0, values = (var_1499_cast_fp16, var_1451_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_211_equation_0, values = (var_1499_cast_fp16, var_1452_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_213_equation_0, values = (var_1499_cast_fp16, var_1453_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_215_equation_0, values = (var_1499_cast_fp16, var_1454_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_217_equation_0, values = (var_1503_cast_fp16, var_1455_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_219_equation_0, values = (var_1503_cast_fp16, var_1456_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_221_equation_0, values = (var_1503_cast_fp16, var_1457_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_223_equation_0, values = (var_1503_cast_fp16, var_1458_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_225_equation_0, values = (var_1503_cast_fp16, var_1459_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_227_equation_0, values = (var_1503_cast_fp16, var_1460_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_229_equation_0, values = (var_1507_cast_fp16, var_1461_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_231_equation_0, values = (var_1507_cast_fp16, var_1462_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_233_equation_0, values = (var_1507_cast_fp16, var_1463_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_235_equation_0, values = (var_1507_cast_fp16, var_1464_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_237_equation_0, values = (var_1507_cast_fp16, var_1465_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_239_equation_0, values = (var_1507_cast_fp16, var_1466_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_241_equation_0, values = (var_1511_cast_fp16, var_1467_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_243_equation_0, values = (var_1511_cast_fp16, var_1468_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_245_equation_0, values = (var_1511_cast_fp16, var_1469_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_247_equation_0, values = (var_1511_cast_fp16, var_1470_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_249_equation_0, values = (var_1511_cast_fp16, var_1471_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_251_equation_0, values = (var_1511_cast_fp16, var_1472_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_253_equation_0, values = (var_1515_cast_fp16, var_1473_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_255_equation_0, values = (var_1515_cast_fp16, var_1474_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_257_equation_0, values = (var_1515_cast_fp16, var_1475_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_259_equation_0, values = (var_1515_cast_fp16, var_1476_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_261_equation_0, values = (var_1515_cast_fp16, var_1477_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_263_equation_0, values = (var_1515_cast_fp16, var_1478_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_265_equation_0, values = (var_1519_cast_fp16, var_1479_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_267_equation_0, values = (var_1519_cast_fp16, var_1480_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_269_equation_0, values = (var_1519_cast_fp16, var_1481_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_271_equation_0, values = (var_1519_cast_fp16, var_1482_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_273_equation_0, values = (var_1519_cast_fp16, var_1483_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_275_equation_0, values = (var_1519_cast_fp16, var_1484_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_277_equation_0, values = (var_1523_cast_fp16, var_1485_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_279_equation_0, values = (var_1523_cast_fp16, var_1486_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_281_equation_0, values = (var_1523_cast_fp16, var_1487_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_283_equation_0, values = (var_1523_cast_fp16, var_1488_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_285_equation_0, values = (var_1523_cast_fp16, var_1489_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_287_equation_0, values = (var_1523_cast_fp16, var_1490_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_287_cast_fp16")]; tensor var_1652_to_fp16 = const()[name = tensor("op_1652_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_193_cast_fp16, y = var_1652_to_fp16)[name = tensor("aw_chunk_193_cast_fp16")]; tensor var_1654_to_fp16 = const()[name = tensor("op_1654_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_195_cast_fp16, y = var_1654_to_fp16)[name = tensor("aw_chunk_195_cast_fp16")]; tensor var_1656_to_fp16 = const()[name = tensor("op_1656_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_197_cast_fp16, y = var_1656_to_fp16)[name = tensor("aw_chunk_197_cast_fp16")]; tensor var_1658_to_fp16 = const()[name = tensor("op_1658_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_199_cast_fp16, y = var_1658_to_fp16)[name = tensor("aw_chunk_199_cast_fp16")]; tensor var_1660_to_fp16 = const()[name = tensor("op_1660_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_201_cast_fp16, y = var_1660_to_fp16)[name = tensor("aw_chunk_201_cast_fp16")]; tensor var_1662_to_fp16 = const()[name = tensor("op_1662_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_203_cast_fp16, y = var_1662_to_fp16)[name = tensor("aw_chunk_203_cast_fp16")]; tensor var_1664_to_fp16 = const()[name = tensor("op_1664_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_205_cast_fp16, y = var_1664_to_fp16)[name = tensor("aw_chunk_205_cast_fp16")]; tensor var_1666_to_fp16 = const()[name = tensor("op_1666_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_207_cast_fp16, y = var_1666_to_fp16)[name = tensor("aw_chunk_207_cast_fp16")]; tensor var_1668_to_fp16 = const()[name = tensor("op_1668_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_209_cast_fp16, y = var_1668_to_fp16)[name = tensor("aw_chunk_209_cast_fp16")]; tensor var_1670_to_fp16 = const()[name = tensor("op_1670_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_211_cast_fp16, y = var_1670_to_fp16)[name = tensor("aw_chunk_211_cast_fp16")]; tensor var_1672_to_fp16 = const()[name = tensor("op_1672_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_213_cast_fp16, y = var_1672_to_fp16)[name = tensor("aw_chunk_213_cast_fp16")]; tensor var_1674_to_fp16 = const()[name = tensor("op_1674_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_215_cast_fp16, y = var_1674_to_fp16)[name = tensor("aw_chunk_215_cast_fp16")]; tensor var_1676_to_fp16 = const()[name = tensor("op_1676_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_217_cast_fp16, y = var_1676_to_fp16)[name = tensor("aw_chunk_217_cast_fp16")]; tensor var_1678_to_fp16 = const()[name = tensor("op_1678_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_219_cast_fp16, y = var_1678_to_fp16)[name = tensor("aw_chunk_219_cast_fp16")]; tensor var_1680_to_fp16 = const()[name = tensor("op_1680_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_221_cast_fp16, y = var_1680_to_fp16)[name = tensor("aw_chunk_221_cast_fp16")]; tensor var_1682_to_fp16 = const()[name = tensor("op_1682_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_223_cast_fp16, y = var_1682_to_fp16)[name = tensor("aw_chunk_223_cast_fp16")]; tensor var_1684_to_fp16 = const()[name = tensor("op_1684_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_225_cast_fp16, y = var_1684_to_fp16)[name = tensor("aw_chunk_225_cast_fp16")]; tensor var_1686_to_fp16 = const()[name = tensor("op_1686_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_227_cast_fp16, y = var_1686_to_fp16)[name = tensor("aw_chunk_227_cast_fp16")]; tensor var_1688_to_fp16 = const()[name = tensor("op_1688_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_229_cast_fp16, y = var_1688_to_fp16)[name = tensor("aw_chunk_229_cast_fp16")]; tensor var_1690_to_fp16 = const()[name = tensor("op_1690_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_231_cast_fp16, y = var_1690_to_fp16)[name = tensor("aw_chunk_231_cast_fp16")]; tensor var_1692_to_fp16 = const()[name = tensor("op_1692_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_233_cast_fp16, y = var_1692_to_fp16)[name = tensor("aw_chunk_233_cast_fp16")]; tensor var_1694_to_fp16 = const()[name = tensor("op_1694_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_235_cast_fp16, y = var_1694_to_fp16)[name = tensor("aw_chunk_235_cast_fp16")]; tensor var_1696_to_fp16 = const()[name = tensor("op_1696_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_237_cast_fp16, y = var_1696_to_fp16)[name = tensor("aw_chunk_237_cast_fp16")]; tensor var_1698_to_fp16 = const()[name = tensor("op_1698_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_239_cast_fp16, y = var_1698_to_fp16)[name = tensor("aw_chunk_239_cast_fp16")]; tensor var_1700_to_fp16 = const()[name = tensor("op_1700_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_241_cast_fp16, y = var_1700_to_fp16)[name = tensor("aw_chunk_241_cast_fp16")]; tensor var_1702_to_fp16 = const()[name = tensor("op_1702_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_243_cast_fp16, y = var_1702_to_fp16)[name = tensor("aw_chunk_243_cast_fp16")]; tensor var_1704_to_fp16 = const()[name = tensor("op_1704_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_245_cast_fp16, y = var_1704_to_fp16)[name = tensor("aw_chunk_245_cast_fp16")]; tensor var_1706_to_fp16 = const()[name = tensor("op_1706_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_247_cast_fp16, y = var_1706_to_fp16)[name = tensor("aw_chunk_247_cast_fp16")]; tensor var_1708_to_fp16 = const()[name = tensor("op_1708_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_249_cast_fp16, y = var_1708_to_fp16)[name = tensor("aw_chunk_249_cast_fp16")]; tensor var_1710_to_fp16 = const()[name = tensor("op_1710_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_251_cast_fp16, y = var_1710_to_fp16)[name = tensor("aw_chunk_251_cast_fp16")]; tensor var_1712_to_fp16 = const()[name = tensor("op_1712_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_253_cast_fp16, y = var_1712_to_fp16)[name = tensor("aw_chunk_253_cast_fp16")]; tensor var_1714_to_fp16 = const()[name = tensor("op_1714_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_255_cast_fp16, y = var_1714_to_fp16)[name = tensor("aw_chunk_255_cast_fp16")]; tensor var_1716_to_fp16 = const()[name = tensor("op_1716_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_257_cast_fp16, y = var_1716_to_fp16)[name = tensor("aw_chunk_257_cast_fp16")]; tensor var_1718_to_fp16 = const()[name = tensor("op_1718_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_259_cast_fp16, y = var_1718_to_fp16)[name = tensor("aw_chunk_259_cast_fp16")]; tensor var_1720_to_fp16 = const()[name = tensor("op_1720_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_261_cast_fp16, y = var_1720_to_fp16)[name = tensor("aw_chunk_261_cast_fp16")]; tensor var_1722_to_fp16 = const()[name = tensor("op_1722_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_263_cast_fp16, y = var_1722_to_fp16)[name = tensor("aw_chunk_263_cast_fp16")]; tensor var_1724_to_fp16 = const()[name = tensor("op_1724_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_265_cast_fp16, y = var_1724_to_fp16)[name = tensor("aw_chunk_265_cast_fp16")]; tensor var_1726_to_fp16 = const()[name = tensor("op_1726_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_267_cast_fp16, y = var_1726_to_fp16)[name = tensor("aw_chunk_267_cast_fp16")]; tensor var_1728_to_fp16 = const()[name = tensor("op_1728_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_269_cast_fp16, y = var_1728_to_fp16)[name = tensor("aw_chunk_269_cast_fp16")]; tensor var_1730_to_fp16 = const()[name = tensor("op_1730_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_271_cast_fp16, y = var_1730_to_fp16)[name = tensor("aw_chunk_271_cast_fp16")]; tensor var_1732_to_fp16 = const()[name = tensor("op_1732_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_273_cast_fp16, y = var_1732_to_fp16)[name = tensor("aw_chunk_273_cast_fp16")]; tensor var_1734_to_fp16 = const()[name = tensor("op_1734_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_275_cast_fp16, y = var_1734_to_fp16)[name = tensor("aw_chunk_275_cast_fp16")]; tensor var_1736_to_fp16 = const()[name = tensor("op_1736_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_277_cast_fp16, y = var_1736_to_fp16)[name = tensor("aw_chunk_277_cast_fp16")]; tensor var_1738_to_fp16 = const()[name = tensor("op_1738_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_279_cast_fp16, y = var_1738_to_fp16)[name = tensor("aw_chunk_279_cast_fp16")]; tensor var_1740_to_fp16 = const()[name = tensor("op_1740_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_281_cast_fp16, y = var_1740_to_fp16)[name = tensor("aw_chunk_281_cast_fp16")]; tensor var_1742_to_fp16 = const()[name = tensor("op_1742_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_283_cast_fp16, y = var_1742_to_fp16)[name = tensor("aw_chunk_283_cast_fp16")]; tensor var_1744_to_fp16 = const()[name = tensor("op_1744_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_285_cast_fp16, y = var_1744_to_fp16)[name = tensor("aw_chunk_285_cast_fp16")]; tensor var_1746_to_fp16 = const()[name = tensor("op_1746_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_287_cast_fp16, y = var_1746_to_fp16)[name = tensor("aw_chunk_287_cast_fp16")]; tensor var_1748_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_193_cast_fp16)[name = tensor("op_1748_cast_fp16")]; tensor var_1749_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_195_cast_fp16)[name = tensor("op_1749_cast_fp16")]; tensor var_1750_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_197_cast_fp16)[name = tensor("op_1750_cast_fp16")]; tensor var_1751_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_199_cast_fp16)[name = tensor("op_1751_cast_fp16")]; tensor var_1752_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_201_cast_fp16)[name = tensor("op_1752_cast_fp16")]; tensor var_1753_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_203_cast_fp16)[name = tensor("op_1753_cast_fp16")]; tensor var_1754_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_205_cast_fp16)[name = tensor("op_1754_cast_fp16")]; tensor var_1755_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_207_cast_fp16)[name = tensor("op_1755_cast_fp16")]; tensor var_1756_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_209_cast_fp16)[name = tensor("op_1756_cast_fp16")]; tensor var_1757_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_211_cast_fp16)[name = tensor("op_1757_cast_fp16")]; tensor var_1758_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_213_cast_fp16)[name = tensor("op_1758_cast_fp16")]; tensor var_1759_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_215_cast_fp16)[name = tensor("op_1759_cast_fp16")]; tensor var_1760_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_217_cast_fp16)[name = tensor("op_1760_cast_fp16")]; tensor var_1761_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_219_cast_fp16)[name = tensor("op_1761_cast_fp16")]; tensor var_1762_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_221_cast_fp16)[name = tensor("op_1762_cast_fp16")]; tensor var_1763_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_223_cast_fp16)[name = tensor("op_1763_cast_fp16")]; tensor var_1764_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_225_cast_fp16)[name = tensor("op_1764_cast_fp16")]; tensor var_1765_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_227_cast_fp16)[name = tensor("op_1765_cast_fp16")]; tensor var_1766_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_229_cast_fp16)[name = tensor("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_231_cast_fp16)[name = tensor("op_1767_cast_fp16")]; tensor var_1768_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_233_cast_fp16)[name = tensor("op_1768_cast_fp16")]; tensor var_1769_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_235_cast_fp16)[name = tensor("op_1769_cast_fp16")]; tensor var_1770_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_237_cast_fp16)[name = tensor("op_1770_cast_fp16")]; tensor var_1771_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_239_cast_fp16)[name = tensor("op_1771_cast_fp16")]; tensor var_1772_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_241_cast_fp16)[name = tensor("op_1772_cast_fp16")]; tensor var_1773_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_243_cast_fp16)[name = tensor("op_1773_cast_fp16")]; tensor var_1774_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_245_cast_fp16)[name = tensor("op_1774_cast_fp16")]; tensor var_1775_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_247_cast_fp16)[name = tensor("op_1775_cast_fp16")]; tensor var_1776_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_249_cast_fp16)[name = tensor("op_1776_cast_fp16")]; tensor var_1777_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_251_cast_fp16)[name = tensor("op_1777_cast_fp16")]; tensor var_1778_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_253_cast_fp16)[name = tensor("op_1778_cast_fp16")]; tensor var_1779_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_255_cast_fp16)[name = tensor("op_1779_cast_fp16")]; tensor var_1780_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_257_cast_fp16)[name = tensor("op_1780_cast_fp16")]; tensor var_1781_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_259_cast_fp16)[name = tensor("op_1781_cast_fp16")]; tensor var_1782_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_261_cast_fp16)[name = tensor("op_1782_cast_fp16")]; tensor var_1783_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_263_cast_fp16)[name = tensor("op_1783_cast_fp16")]; tensor var_1784_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_265_cast_fp16)[name = tensor("op_1784_cast_fp16")]; tensor var_1785_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_267_cast_fp16)[name = tensor("op_1785_cast_fp16")]; tensor var_1786_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_269_cast_fp16)[name = tensor("op_1786_cast_fp16")]; tensor var_1787_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_271_cast_fp16)[name = tensor("op_1787_cast_fp16")]; tensor var_1788_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_273_cast_fp16)[name = tensor("op_1788_cast_fp16")]; tensor var_1789_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_275_cast_fp16)[name = tensor("op_1789_cast_fp16")]; tensor var_1790_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_277_cast_fp16)[name = tensor("op_1790_cast_fp16")]; tensor var_1791_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_279_cast_fp16)[name = tensor("op_1791_cast_fp16")]; tensor var_1792_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_281_cast_fp16)[name = tensor("op_1792_cast_fp16")]; tensor var_1793_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_283_cast_fp16)[name = tensor("op_1793_cast_fp16")]; tensor var_1794_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_285_cast_fp16)[name = tensor("op_1794_cast_fp16")]; tensor var_1795_cast_fp16 = softmax(axis = var_1360, x = aw_chunk_287_cast_fp16)[name = tensor("op_1795_cast_fp16")]; tensor var_1797_equation_0 = const()[name = tensor("op_1797_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1797_cast_fp16 = einsum(equation = var_1797_equation_0, values = (var_1525_cast_fp16, var_1748_cast_fp16))[name = tensor("op_1797_cast_fp16")]; tensor var_1799_equation_0 = const()[name = tensor("op_1799_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1799_cast_fp16 = einsum(equation = var_1799_equation_0, values = (var_1525_cast_fp16, var_1749_cast_fp16))[name = tensor("op_1799_cast_fp16")]; tensor var_1801_equation_0 = const()[name = tensor("op_1801_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1801_cast_fp16 = einsum(equation = var_1801_equation_0, values = (var_1525_cast_fp16, var_1750_cast_fp16))[name = tensor("op_1801_cast_fp16")]; tensor var_1803_equation_0 = const()[name = tensor("op_1803_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1803_cast_fp16 = einsum(equation = var_1803_equation_0, values = (var_1525_cast_fp16, var_1751_cast_fp16))[name = tensor("op_1803_cast_fp16")]; tensor var_1805_equation_0 = const()[name = tensor("op_1805_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1805_cast_fp16 = einsum(equation = var_1805_equation_0, values = (var_1525_cast_fp16, var_1752_cast_fp16))[name = tensor("op_1805_cast_fp16")]; tensor var_1807_equation_0 = const()[name = tensor("op_1807_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1807_cast_fp16 = einsum(equation = var_1807_equation_0, values = (var_1525_cast_fp16, var_1753_cast_fp16))[name = tensor("op_1807_cast_fp16")]; tensor var_1809_equation_0 = const()[name = tensor("op_1809_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1809_cast_fp16 = einsum(equation = var_1809_equation_0, values = (var_1529_cast_fp16, var_1754_cast_fp16))[name = tensor("op_1809_cast_fp16")]; tensor var_1811_equation_0 = const()[name = tensor("op_1811_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1811_cast_fp16 = einsum(equation = var_1811_equation_0, values = (var_1529_cast_fp16, var_1755_cast_fp16))[name = tensor("op_1811_cast_fp16")]; tensor var_1813_equation_0 = const()[name = tensor("op_1813_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1813_cast_fp16 = einsum(equation = var_1813_equation_0, values = (var_1529_cast_fp16, var_1756_cast_fp16))[name = tensor("op_1813_cast_fp16")]; tensor var_1815_equation_0 = const()[name = tensor("op_1815_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1815_cast_fp16 = einsum(equation = var_1815_equation_0, values = (var_1529_cast_fp16, var_1757_cast_fp16))[name = tensor("op_1815_cast_fp16")]; tensor var_1817_equation_0 = const()[name = tensor("op_1817_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1817_cast_fp16 = einsum(equation = var_1817_equation_0, values = (var_1529_cast_fp16, var_1758_cast_fp16))[name = tensor("op_1817_cast_fp16")]; tensor var_1819_equation_0 = const()[name = tensor("op_1819_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1819_cast_fp16 = einsum(equation = var_1819_equation_0, values = (var_1529_cast_fp16, var_1759_cast_fp16))[name = tensor("op_1819_cast_fp16")]; tensor var_1821_equation_0 = const()[name = tensor("op_1821_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1821_cast_fp16 = einsum(equation = var_1821_equation_0, values = (var_1533_cast_fp16, var_1760_cast_fp16))[name = tensor("op_1821_cast_fp16")]; tensor var_1823_equation_0 = const()[name = tensor("op_1823_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1823_cast_fp16 = einsum(equation = var_1823_equation_0, values = (var_1533_cast_fp16, var_1761_cast_fp16))[name = tensor("op_1823_cast_fp16")]; tensor var_1825_equation_0 = const()[name = tensor("op_1825_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1825_cast_fp16 = einsum(equation = var_1825_equation_0, values = (var_1533_cast_fp16, var_1762_cast_fp16))[name = tensor("op_1825_cast_fp16")]; tensor var_1827_equation_0 = const()[name = tensor("op_1827_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1827_cast_fp16 = einsum(equation = var_1827_equation_0, values = (var_1533_cast_fp16, var_1763_cast_fp16))[name = tensor("op_1827_cast_fp16")]; tensor var_1829_equation_0 = const()[name = tensor("op_1829_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1829_cast_fp16 = einsum(equation = var_1829_equation_0, values = (var_1533_cast_fp16, var_1764_cast_fp16))[name = tensor("op_1829_cast_fp16")]; tensor var_1831_equation_0 = const()[name = tensor("op_1831_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1831_cast_fp16 = einsum(equation = var_1831_equation_0, values = (var_1533_cast_fp16, var_1765_cast_fp16))[name = tensor("op_1831_cast_fp16")]; tensor var_1833_equation_0 = const()[name = tensor("op_1833_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1833_cast_fp16 = einsum(equation = var_1833_equation_0, values = (var_1537_cast_fp16, var_1766_cast_fp16))[name = tensor("op_1833_cast_fp16")]; tensor var_1835_equation_0 = const()[name = tensor("op_1835_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1835_cast_fp16 = einsum(equation = var_1835_equation_0, values = (var_1537_cast_fp16, var_1767_cast_fp16))[name = tensor("op_1835_cast_fp16")]; tensor var_1837_equation_0 = const()[name = tensor("op_1837_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1837_cast_fp16 = einsum(equation = var_1837_equation_0, values = (var_1537_cast_fp16, var_1768_cast_fp16))[name = tensor("op_1837_cast_fp16")]; tensor var_1839_equation_0 = const()[name = tensor("op_1839_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1839_cast_fp16 = einsum(equation = var_1839_equation_0, values = (var_1537_cast_fp16, var_1769_cast_fp16))[name = tensor("op_1839_cast_fp16")]; tensor var_1841_equation_0 = const()[name = tensor("op_1841_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1841_cast_fp16 = einsum(equation = var_1841_equation_0, values = (var_1537_cast_fp16, var_1770_cast_fp16))[name = tensor("op_1841_cast_fp16")]; tensor var_1843_equation_0 = const()[name = tensor("op_1843_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1843_cast_fp16 = einsum(equation = var_1843_equation_0, values = (var_1537_cast_fp16, var_1771_cast_fp16))[name = tensor("op_1843_cast_fp16")]; tensor var_1845_equation_0 = const()[name = tensor("op_1845_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1845_cast_fp16 = einsum(equation = var_1845_equation_0, values = (var_1541_cast_fp16, var_1772_cast_fp16))[name = tensor("op_1845_cast_fp16")]; tensor var_1847_equation_0 = const()[name = tensor("op_1847_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1847_cast_fp16 = einsum(equation = var_1847_equation_0, values = (var_1541_cast_fp16, var_1773_cast_fp16))[name = tensor("op_1847_cast_fp16")]; tensor var_1849_equation_0 = const()[name = tensor("op_1849_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1849_cast_fp16 = einsum(equation = var_1849_equation_0, values = (var_1541_cast_fp16, var_1774_cast_fp16))[name = tensor("op_1849_cast_fp16")]; tensor var_1851_equation_0 = const()[name = tensor("op_1851_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1851_cast_fp16 = einsum(equation = var_1851_equation_0, values = (var_1541_cast_fp16, var_1775_cast_fp16))[name = tensor("op_1851_cast_fp16")]; tensor var_1853_equation_0 = const()[name = tensor("op_1853_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1853_cast_fp16 = einsum(equation = var_1853_equation_0, values = (var_1541_cast_fp16, var_1776_cast_fp16))[name = tensor("op_1853_cast_fp16")]; tensor var_1855_equation_0 = const()[name = tensor("op_1855_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1855_cast_fp16 = einsum(equation = var_1855_equation_0, values = (var_1541_cast_fp16, var_1777_cast_fp16))[name = tensor("op_1855_cast_fp16")]; tensor var_1857_equation_0 = const()[name = tensor("op_1857_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1857_cast_fp16 = einsum(equation = var_1857_equation_0, values = (var_1545_cast_fp16, var_1778_cast_fp16))[name = tensor("op_1857_cast_fp16")]; tensor var_1859_equation_0 = const()[name = tensor("op_1859_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1859_cast_fp16 = einsum(equation = var_1859_equation_0, values = (var_1545_cast_fp16, var_1779_cast_fp16))[name = tensor("op_1859_cast_fp16")]; tensor var_1861_equation_0 = const()[name = tensor("op_1861_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1861_cast_fp16 = einsum(equation = var_1861_equation_0, values = (var_1545_cast_fp16, var_1780_cast_fp16))[name = tensor("op_1861_cast_fp16")]; tensor var_1863_equation_0 = const()[name = tensor("op_1863_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1863_cast_fp16 = einsum(equation = var_1863_equation_0, values = (var_1545_cast_fp16, var_1781_cast_fp16))[name = tensor("op_1863_cast_fp16")]; tensor var_1865_equation_0 = const()[name = tensor("op_1865_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1865_cast_fp16 = einsum(equation = var_1865_equation_0, values = (var_1545_cast_fp16, var_1782_cast_fp16))[name = tensor("op_1865_cast_fp16")]; tensor var_1867_equation_0 = const()[name = tensor("op_1867_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1867_cast_fp16 = einsum(equation = var_1867_equation_0, values = (var_1545_cast_fp16, var_1783_cast_fp16))[name = tensor("op_1867_cast_fp16")]; tensor var_1869_equation_0 = const()[name = tensor("op_1869_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1869_cast_fp16 = einsum(equation = var_1869_equation_0, values = (var_1549_cast_fp16, var_1784_cast_fp16))[name = tensor("op_1869_cast_fp16")]; tensor var_1871_equation_0 = const()[name = tensor("op_1871_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1871_cast_fp16 = einsum(equation = var_1871_equation_0, values = (var_1549_cast_fp16, var_1785_cast_fp16))[name = tensor("op_1871_cast_fp16")]; tensor var_1873_equation_0 = const()[name = tensor("op_1873_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1873_cast_fp16 = einsum(equation = var_1873_equation_0, values = (var_1549_cast_fp16, var_1786_cast_fp16))[name = tensor("op_1873_cast_fp16")]; tensor var_1875_equation_0 = const()[name = tensor("op_1875_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1875_cast_fp16 = einsum(equation = var_1875_equation_0, values = (var_1549_cast_fp16, var_1787_cast_fp16))[name = tensor("op_1875_cast_fp16")]; tensor var_1877_equation_0 = const()[name = tensor("op_1877_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1877_cast_fp16 = einsum(equation = var_1877_equation_0, values = (var_1549_cast_fp16, var_1788_cast_fp16))[name = tensor("op_1877_cast_fp16")]; tensor var_1879_equation_0 = const()[name = tensor("op_1879_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1879_cast_fp16 = einsum(equation = var_1879_equation_0, values = (var_1549_cast_fp16, var_1789_cast_fp16))[name = tensor("op_1879_cast_fp16")]; tensor var_1881_equation_0 = const()[name = tensor("op_1881_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1881_cast_fp16 = einsum(equation = var_1881_equation_0, values = (var_1553_cast_fp16, var_1790_cast_fp16))[name = tensor("op_1881_cast_fp16")]; tensor var_1883_equation_0 = const()[name = tensor("op_1883_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1883_cast_fp16 = einsum(equation = var_1883_equation_0, values = (var_1553_cast_fp16, var_1791_cast_fp16))[name = tensor("op_1883_cast_fp16")]; tensor var_1885_equation_0 = const()[name = tensor("op_1885_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1885_cast_fp16 = einsum(equation = var_1885_equation_0, values = (var_1553_cast_fp16, var_1792_cast_fp16))[name = tensor("op_1885_cast_fp16")]; tensor var_1887_equation_0 = const()[name = tensor("op_1887_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1887_cast_fp16 = einsum(equation = var_1887_equation_0, values = (var_1553_cast_fp16, var_1793_cast_fp16))[name = tensor("op_1887_cast_fp16")]; tensor var_1889_equation_0 = const()[name = tensor("op_1889_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1889_cast_fp16 = einsum(equation = var_1889_equation_0, values = (var_1553_cast_fp16, var_1794_cast_fp16))[name = tensor("op_1889_cast_fp16")]; tensor var_1891_equation_0 = const()[name = tensor("op_1891_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1891_cast_fp16 = einsum(equation = var_1891_equation_0, values = (var_1553_cast_fp16, var_1795_cast_fp16))[name = tensor("op_1891_cast_fp16")]; tensor var_1893_interleave_0 = const()[name = tensor("op_1893_interleave_0"), val = tensor(false)]; tensor var_1893_cast_fp16 = concat(axis = var_1347, interleave = var_1893_interleave_0, values = (var_1797_cast_fp16, var_1799_cast_fp16, var_1801_cast_fp16, var_1803_cast_fp16, var_1805_cast_fp16, var_1807_cast_fp16))[name = tensor("op_1893_cast_fp16")]; tensor var_1895_interleave_0 = const()[name = tensor("op_1895_interleave_0"), val = tensor(false)]; tensor var_1895_cast_fp16 = concat(axis = var_1347, interleave = var_1895_interleave_0, values = (var_1809_cast_fp16, var_1811_cast_fp16, var_1813_cast_fp16, var_1815_cast_fp16, var_1817_cast_fp16, var_1819_cast_fp16))[name = tensor("op_1895_cast_fp16")]; tensor var_1897_interleave_0 = const()[name = tensor("op_1897_interleave_0"), val = tensor(false)]; tensor var_1897_cast_fp16 = concat(axis = var_1347, interleave = var_1897_interleave_0, values = (var_1821_cast_fp16, var_1823_cast_fp16, var_1825_cast_fp16, var_1827_cast_fp16, var_1829_cast_fp16, var_1831_cast_fp16))[name = tensor("op_1897_cast_fp16")]; tensor var_1899_interleave_0 = const()[name = tensor("op_1899_interleave_0"), val = tensor(false)]; tensor var_1899_cast_fp16 = concat(axis = var_1347, interleave = var_1899_interleave_0, values = (var_1833_cast_fp16, var_1835_cast_fp16, var_1837_cast_fp16, var_1839_cast_fp16, var_1841_cast_fp16, var_1843_cast_fp16))[name = tensor("op_1899_cast_fp16")]; tensor var_1901_interleave_0 = const()[name = tensor("op_1901_interleave_0"), val = tensor(false)]; tensor var_1901_cast_fp16 = concat(axis = var_1347, interleave = var_1901_interleave_0, values = (var_1845_cast_fp16, var_1847_cast_fp16, var_1849_cast_fp16, var_1851_cast_fp16, var_1853_cast_fp16, var_1855_cast_fp16))[name = tensor("op_1901_cast_fp16")]; tensor var_1903_interleave_0 = const()[name = tensor("op_1903_interleave_0"), val = tensor(false)]; tensor var_1903_cast_fp16 = concat(axis = var_1347, interleave = var_1903_interleave_0, values = (var_1857_cast_fp16, var_1859_cast_fp16, var_1861_cast_fp16, var_1863_cast_fp16, var_1865_cast_fp16, var_1867_cast_fp16))[name = tensor("op_1903_cast_fp16")]; tensor var_1905_interleave_0 = const()[name = tensor("op_1905_interleave_0"), val = tensor(false)]; tensor var_1905_cast_fp16 = concat(axis = var_1347, interleave = var_1905_interleave_0, values = (var_1869_cast_fp16, var_1871_cast_fp16, var_1873_cast_fp16, var_1875_cast_fp16, var_1877_cast_fp16, var_1879_cast_fp16))[name = tensor("op_1905_cast_fp16")]; tensor var_1907_interleave_0 = const()[name = tensor("op_1907_interleave_0"), val = tensor(false)]; tensor var_1907_cast_fp16 = concat(axis = var_1347, interleave = var_1907_interleave_0, values = (var_1881_cast_fp16, var_1883_cast_fp16, var_1885_cast_fp16, var_1887_cast_fp16, var_1889_cast_fp16, var_1891_cast_fp16))[name = tensor("op_1907_cast_fp16")]; tensor input_17_interleave_0 = const()[name = tensor("input_17_interleave_0"), val = tensor(false)]; tensor input_17_cast_fp16 = concat(axis = var_1360, interleave = input_17_interleave_0, values = (var_1893_cast_fp16, var_1895_cast_fp16, var_1897_cast_fp16, var_1899_cast_fp16, var_1901_cast_fp16, var_1903_cast_fp16, var_1905_cast_fp16, var_1907_cast_fp16))[name = tensor("input_17_cast_fp16")]; tensor obj_11_pad_type_0 = const()[name = tensor("obj_11_pad_type_0"), val = tensor("valid")]; tensor obj_11_strides_0 = const()[name = tensor("obj_11_strides_0"), val = tensor([1, 1])]; tensor obj_11_pad_0 = const()[name = tensor("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_11_dilations_0 = const()[name = tensor("obj_11_dilations_0"), val = tensor([1, 1])]; tensor obj_11_groups_0 = const()[name = tensor("obj_11_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17546048)))]; tensor layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18070400)))]; tensor obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("obj_11_cast_fp16")]; tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; tensor var_1926_to_fp16 = const()[name = tensor("op_1926_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_1926_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18071488)))]; tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18072576)))]; tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_21_pad_type_0 = const()[name = tensor("input_21_pad_type_0"), val = tensor("valid")]; tensor input_21_strides_0 = const()[name = tensor("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = tensor("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = tensor("input_21_dilations_0"), val = tensor([1, 1])]; tensor input_21_groups_0 = const()[name = tensor("input_21_groups_0"), val = tensor(1)]; tensor layers_2_fc1_weight_to_fp16 = const()[name = tensor("layers_2_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18073664)))]; tensor layers_2_fc1_bias_to_fp16 = const()[name = tensor("layers_2_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20170880)))]; tensor input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor hidden_states_9_pad_type_0 = const()[name = tensor("hidden_states_9_pad_type_0"), val = tensor("valid")]; tensor hidden_states_9_strides_0 = const()[name = tensor("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = tensor("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = tensor("hidden_states_9_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_9_groups_0 = const()[name = tensor("hidden_states_9_groups_0"), val = tensor(1)]; tensor layers_2_fc2_weight_to_fp16 = const()[name = tensor("layers_2_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20175040)))]; tensor layers_2_fc2_bias_to_fp16 = const()[name = tensor("layers_2_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22272256)))]; tensor hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; tensor var_1958 = const()[name = tensor("op_1958"), val = tensor(3)]; tensor var_1971 = const()[name = tensor("op_1971"), val = tensor(1)]; tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; tensor var_1988_to_fp16 = const()[name = tensor("op_1988_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_1988_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22273344)))]; tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22274432)))]; tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; tensor query_7_pad_type_0 = const()[name = tensor("query_7_pad_type_0"), val = tensor("valid")]; tensor query_7_strides_0 = const()[name = tensor("query_7_strides_0"), val = tensor([1, 1])]; tensor query_7_pad_0 = const()[name = tensor("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_7_dilations_0 = const()[name = tensor("query_7_dilations_0"), val = tensor([1, 1])]; tensor query_7_groups_0 = const()[name = tensor("query_7_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22275520)))]; tensor layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22799872)))]; tensor query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("query_7_cast_fp16")]; tensor key_7_pad_type_0 = const()[name = tensor("key_7_pad_type_0"), val = tensor("valid")]; tensor key_7_strides_0 = const()[name = tensor("key_7_strides_0"), val = tensor([1, 1])]; tensor key_7_pad_0 = const()[name = tensor("key_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_7_dilations_0 = const()[name = tensor("key_7_dilations_0"), val = tensor([1, 1])]; tensor key_7_groups_0 = const()[name = tensor("key_7_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22800960)))]; tensor key_7_cast_fp16 = conv(dilations = key_7_dilations_0, groups = key_7_groups_0, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = key_7_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("key_7_cast_fp16")]; tensor value_7_pad_type_0 = const()[name = tensor("value_7_pad_type_0"), val = tensor("valid")]; tensor value_7_strides_0 = const()[name = tensor("value_7_strides_0"), val = tensor([1, 1])]; tensor value_7_pad_0 = const()[name = tensor("value_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_7_dilations_0 = const()[name = tensor("value_7_dilations_0"), val = tensor([1, 1])]; tensor value_7_groups_0 = const()[name = tensor("value_7_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23325312)))]; tensor layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23849664)))]; tensor value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = value_7_dilations_0, groups = value_7_groups_0, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("value_7_cast_fp16")]; tensor var_2023_begin_0 = const()[name = tensor("op_2023_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2023_end_0 = const()[name = tensor("op_2023_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2023_end_mask_0 = const()[name = tensor("op_2023_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2023_cast_fp16 = slice_by_index(begin = var_2023_begin_0, end = var_2023_end_0, end_mask = var_2023_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2023_cast_fp16")]; tensor var_2027_begin_0 = const()[name = tensor("op_2027_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2027_end_0 = const()[name = tensor("op_2027_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2027_end_mask_0 = const()[name = tensor("op_2027_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2027_cast_fp16 = slice_by_index(begin = var_2027_begin_0, end = var_2027_end_0, end_mask = var_2027_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2027_cast_fp16")]; tensor var_2031_begin_0 = const()[name = tensor("op_2031_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2031_end_0 = const()[name = tensor("op_2031_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2031_end_mask_0 = const()[name = tensor("op_2031_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2031_cast_fp16 = slice_by_index(begin = var_2031_begin_0, end = var_2031_end_0, end_mask = var_2031_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2031_cast_fp16")]; tensor var_2035_begin_0 = const()[name = tensor("op_2035_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2035_end_0 = const()[name = tensor("op_2035_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2035_end_mask_0 = const()[name = tensor("op_2035_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2035_cast_fp16 = slice_by_index(begin = var_2035_begin_0, end = var_2035_end_0, end_mask = var_2035_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2035_cast_fp16")]; tensor var_2039_begin_0 = const()[name = tensor("op_2039_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2039_end_0 = const()[name = tensor("op_2039_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2039_end_mask_0 = const()[name = tensor("op_2039_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2039_cast_fp16 = slice_by_index(begin = var_2039_begin_0, end = var_2039_end_0, end_mask = var_2039_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2039_cast_fp16")]; tensor var_2043_begin_0 = const()[name = tensor("op_2043_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2043_end_0 = const()[name = tensor("op_2043_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2043_end_mask_0 = const()[name = tensor("op_2043_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2043_cast_fp16 = slice_by_index(begin = var_2043_begin_0, end = var_2043_end_0, end_mask = var_2043_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2043_cast_fp16")]; tensor var_2047_begin_0 = const()[name = tensor("op_2047_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2047_end_0 = const()[name = tensor("op_2047_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2047_end_mask_0 = const()[name = tensor("op_2047_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2047_cast_fp16 = slice_by_index(begin = var_2047_begin_0, end = var_2047_end_0, end_mask = var_2047_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2047_cast_fp16")]; tensor var_2051_begin_0 = const()[name = tensor("op_2051_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2051_end_0 = const()[name = tensor("op_2051_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_2051_end_mask_0 = const()[name = tensor("op_2051_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2051_cast_fp16 = slice_by_index(begin = var_2051_begin_0, end = var_2051_end_0, end_mask = var_2051_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2051_cast_fp16")]; tensor var_2054_begin_0 = const()[name = tensor("op_2054_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2054_end_0 = const()[name = tensor("op_2054_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2054_end_mask_0 = const()[name = tensor("op_2054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2054_cast_fp16 = slice_by_index(begin = var_2054_begin_0, end = var_2054_end_0, end_mask = var_2054_end_mask_0, x = var_2023_cast_fp16)[name = tensor("op_2054_cast_fp16")]; tensor var_2055_begin_0 = const()[name = tensor("op_2055_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2055_end_0 = const()[name = tensor("op_2055_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2055_end_mask_0 = const()[name = tensor("op_2055_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = var_2055_end_0, end_mask = var_2055_end_mask_0, x = var_2023_cast_fp16)[name = tensor("op_2055_cast_fp16")]; tensor var_2056_begin_0 = const()[name = tensor("op_2056_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2056_end_0 = const()[name = tensor("op_2056_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2056_end_mask_0 = const()[name = tensor("op_2056_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2056_cast_fp16 = slice_by_index(begin = var_2056_begin_0, end = var_2056_end_0, end_mask = var_2056_end_mask_0, x = var_2023_cast_fp16)[name = tensor("op_2056_cast_fp16")]; tensor var_2057_begin_0 = const()[name = tensor("op_2057_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2057_end_0 = const()[name = tensor("op_2057_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2057_end_mask_0 = const()[name = tensor("op_2057_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2057_cast_fp16 = slice_by_index(begin = var_2057_begin_0, end = var_2057_end_0, end_mask = var_2057_end_mask_0, x = var_2023_cast_fp16)[name = tensor("op_2057_cast_fp16")]; tensor var_2058_begin_0 = const()[name = tensor("op_2058_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2058_end_0 = const()[name = tensor("op_2058_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2058_end_mask_0 = const()[name = tensor("op_2058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2058_cast_fp16 = slice_by_index(begin = var_2058_begin_0, end = var_2058_end_0, end_mask = var_2058_end_mask_0, x = var_2023_cast_fp16)[name = tensor("op_2058_cast_fp16")]; tensor var_2059_begin_0 = const()[name = tensor("op_2059_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2059_end_0 = const()[name = tensor("op_2059_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2059_end_mask_0 = const()[name = tensor("op_2059_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2059_cast_fp16 = slice_by_index(begin = var_2059_begin_0, end = var_2059_end_0, end_mask = var_2059_end_mask_0, x = var_2023_cast_fp16)[name = tensor("op_2059_cast_fp16")]; tensor var_2060_begin_0 = const()[name = tensor("op_2060_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2060_end_0 = const()[name = tensor("op_2060_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2060_end_mask_0 = const()[name = tensor("op_2060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2060_cast_fp16 = slice_by_index(begin = var_2060_begin_0, end = var_2060_end_0, end_mask = var_2060_end_mask_0, x = var_2027_cast_fp16)[name = tensor("op_2060_cast_fp16")]; tensor var_2061_begin_0 = const()[name = tensor("op_2061_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2061_end_0 = const()[name = tensor("op_2061_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2061_end_mask_0 = const()[name = tensor("op_2061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2061_cast_fp16 = slice_by_index(begin = var_2061_begin_0, end = var_2061_end_0, end_mask = var_2061_end_mask_0, x = var_2027_cast_fp16)[name = tensor("op_2061_cast_fp16")]; tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = var_2027_cast_fp16)[name = tensor("op_2062_cast_fp16")]; tensor var_2063_begin_0 = const()[name = tensor("op_2063_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2063_end_0 = const()[name = tensor("op_2063_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2063_end_mask_0 = const()[name = tensor("op_2063_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2063_cast_fp16 = slice_by_index(begin = var_2063_begin_0, end = var_2063_end_0, end_mask = var_2063_end_mask_0, x = var_2027_cast_fp16)[name = tensor("op_2063_cast_fp16")]; tensor var_2064_begin_0 = const()[name = tensor("op_2064_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2064_end_0 = const()[name = tensor("op_2064_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2064_end_mask_0 = const()[name = tensor("op_2064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2064_cast_fp16 = slice_by_index(begin = var_2064_begin_0, end = var_2064_end_0, end_mask = var_2064_end_mask_0, x = var_2027_cast_fp16)[name = tensor("op_2064_cast_fp16")]; tensor var_2065_begin_0 = const()[name = tensor("op_2065_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2065_end_0 = const()[name = tensor("op_2065_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2065_end_mask_0 = const()[name = tensor("op_2065_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2065_cast_fp16 = slice_by_index(begin = var_2065_begin_0, end = var_2065_end_0, end_mask = var_2065_end_mask_0, x = var_2027_cast_fp16)[name = tensor("op_2065_cast_fp16")]; tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = var_2031_cast_fp16)[name = tensor("op_2066_cast_fp16")]; tensor var_2067_begin_0 = const()[name = tensor("op_2067_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2067_end_0 = const()[name = tensor("op_2067_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2067_end_mask_0 = const()[name = tensor("op_2067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2067_cast_fp16 = slice_by_index(begin = var_2067_begin_0, end = var_2067_end_0, end_mask = var_2067_end_mask_0, x = var_2031_cast_fp16)[name = tensor("op_2067_cast_fp16")]; tensor var_2068_begin_0 = const()[name = tensor("op_2068_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2068_end_0 = const()[name = tensor("op_2068_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2068_end_mask_0 = const()[name = tensor("op_2068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2068_cast_fp16 = slice_by_index(begin = var_2068_begin_0, end = var_2068_end_0, end_mask = var_2068_end_mask_0, x = var_2031_cast_fp16)[name = tensor("op_2068_cast_fp16")]; tensor var_2069_begin_0 = const()[name = tensor("op_2069_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2069_end_0 = const()[name = tensor("op_2069_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2069_end_mask_0 = const()[name = tensor("op_2069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2069_cast_fp16 = slice_by_index(begin = var_2069_begin_0, end = var_2069_end_0, end_mask = var_2069_end_mask_0, x = var_2031_cast_fp16)[name = tensor("op_2069_cast_fp16")]; tensor var_2070_begin_0 = const()[name = tensor("op_2070_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2070_end_0 = const()[name = tensor("op_2070_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2070_end_mask_0 = const()[name = tensor("op_2070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2070_cast_fp16 = slice_by_index(begin = var_2070_begin_0, end = var_2070_end_0, end_mask = var_2070_end_mask_0, x = var_2031_cast_fp16)[name = tensor("op_2070_cast_fp16")]; tensor var_2071_begin_0 = const()[name = tensor("op_2071_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2071_end_0 = const()[name = tensor("op_2071_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2071_end_mask_0 = const()[name = tensor("op_2071_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2071_cast_fp16 = slice_by_index(begin = var_2071_begin_0, end = var_2071_end_0, end_mask = var_2071_end_mask_0, x = var_2031_cast_fp16)[name = tensor("op_2071_cast_fp16")]; tensor var_2072_begin_0 = const()[name = tensor("op_2072_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2072_end_0 = const()[name = tensor("op_2072_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2072_end_mask_0 = const()[name = tensor("op_2072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2072_cast_fp16 = slice_by_index(begin = var_2072_begin_0, end = var_2072_end_0, end_mask = var_2072_end_mask_0, x = var_2035_cast_fp16)[name = tensor("op_2072_cast_fp16")]; tensor var_2073_begin_0 = const()[name = tensor("op_2073_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2073_end_0 = const()[name = tensor("op_2073_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2073_end_mask_0 = const()[name = tensor("op_2073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2073_cast_fp16 = slice_by_index(begin = var_2073_begin_0, end = var_2073_end_0, end_mask = var_2073_end_mask_0, x = var_2035_cast_fp16)[name = tensor("op_2073_cast_fp16")]; tensor var_2074_begin_0 = const()[name = tensor("op_2074_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2074_end_0 = const()[name = tensor("op_2074_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2074_end_mask_0 = const()[name = tensor("op_2074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2074_cast_fp16 = slice_by_index(begin = var_2074_begin_0, end = var_2074_end_0, end_mask = var_2074_end_mask_0, x = var_2035_cast_fp16)[name = tensor("op_2074_cast_fp16")]; tensor var_2075_begin_0 = const()[name = tensor("op_2075_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2075_end_0 = const()[name = tensor("op_2075_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2075_end_mask_0 = const()[name = tensor("op_2075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2075_cast_fp16 = slice_by_index(begin = var_2075_begin_0, end = var_2075_end_0, end_mask = var_2075_end_mask_0, x = var_2035_cast_fp16)[name = tensor("op_2075_cast_fp16")]; tensor var_2076_begin_0 = const()[name = tensor("op_2076_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2076_end_0 = const()[name = tensor("op_2076_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2076_end_mask_0 = const()[name = tensor("op_2076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2076_cast_fp16 = slice_by_index(begin = var_2076_begin_0, end = var_2076_end_0, end_mask = var_2076_end_mask_0, x = var_2035_cast_fp16)[name = tensor("op_2076_cast_fp16")]; tensor var_2077_begin_0 = const()[name = tensor("op_2077_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2077_end_0 = const()[name = tensor("op_2077_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2077_end_mask_0 = const()[name = tensor("op_2077_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2077_cast_fp16 = slice_by_index(begin = var_2077_begin_0, end = var_2077_end_0, end_mask = var_2077_end_mask_0, x = var_2035_cast_fp16)[name = tensor("op_2077_cast_fp16")]; tensor var_2078_begin_0 = const()[name = tensor("op_2078_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2078_end_0 = const()[name = tensor("op_2078_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2078_end_mask_0 = const()[name = tensor("op_2078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2078_cast_fp16 = slice_by_index(begin = var_2078_begin_0, end = var_2078_end_0, end_mask = var_2078_end_mask_0, x = var_2039_cast_fp16)[name = tensor("op_2078_cast_fp16")]; tensor var_2079_begin_0 = const()[name = tensor("op_2079_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2079_end_0 = const()[name = tensor("op_2079_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2079_end_mask_0 = const()[name = tensor("op_2079_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2079_cast_fp16 = slice_by_index(begin = var_2079_begin_0, end = var_2079_end_0, end_mask = var_2079_end_mask_0, x = var_2039_cast_fp16)[name = tensor("op_2079_cast_fp16")]; tensor var_2080_begin_0 = const()[name = tensor("op_2080_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2080_end_0 = const()[name = tensor("op_2080_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2080_end_mask_0 = const()[name = tensor("op_2080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2080_cast_fp16 = slice_by_index(begin = var_2080_begin_0, end = var_2080_end_0, end_mask = var_2080_end_mask_0, x = var_2039_cast_fp16)[name = tensor("op_2080_cast_fp16")]; tensor var_2081_begin_0 = const()[name = tensor("op_2081_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2081_end_0 = const()[name = tensor("op_2081_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2081_end_mask_0 = const()[name = tensor("op_2081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2081_cast_fp16 = slice_by_index(begin = var_2081_begin_0, end = var_2081_end_0, end_mask = var_2081_end_mask_0, x = var_2039_cast_fp16)[name = tensor("op_2081_cast_fp16")]; tensor var_2082_begin_0 = const()[name = tensor("op_2082_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2082_end_0 = const()[name = tensor("op_2082_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2082_end_mask_0 = const()[name = tensor("op_2082_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, x = var_2039_cast_fp16)[name = tensor("op_2082_cast_fp16")]; tensor var_2083_begin_0 = const()[name = tensor("op_2083_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2083_end_0 = const()[name = tensor("op_2083_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2083_end_mask_0 = const()[name = tensor("op_2083_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2083_cast_fp16 = slice_by_index(begin = var_2083_begin_0, end = var_2083_end_0, end_mask = var_2083_end_mask_0, x = var_2039_cast_fp16)[name = tensor("op_2083_cast_fp16")]; tensor var_2084_begin_0 = const()[name = tensor("op_2084_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2084_end_0 = const()[name = tensor("op_2084_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2084_end_mask_0 = const()[name = tensor("op_2084_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2084_cast_fp16 = slice_by_index(begin = var_2084_begin_0, end = var_2084_end_0, end_mask = var_2084_end_mask_0, x = var_2043_cast_fp16)[name = tensor("op_2084_cast_fp16")]; tensor var_2085_begin_0 = const()[name = tensor("op_2085_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2085_end_0 = const()[name = tensor("op_2085_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2085_end_mask_0 = const()[name = tensor("op_2085_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2085_cast_fp16 = slice_by_index(begin = var_2085_begin_0, end = var_2085_end_0, end_mask = var_2085_end_mask_0, x = var_2043_cast_fp16)[name = tensor("op_2085_cast_fp16")]; tensor var_2086_begin_0 = const()[name = tensor("op_2086_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2086_end_0 = const()[name = tensor("op_2086_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2086_end_mask_0 = const()[name = tensor("op_2086_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, x = var_2043_cast_fp16)[name = tensor("op_2086_cast_fp16")]; tensor var_2087_begin_0 = const()[name = tensor("op_2087_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2087_end_0 = const()[name = tensor("op_2087_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2087_end_mask_0 = const()[name = tensor("op_2087_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2087_cast_fp16 = slice_by_index(begin = var_2087_begin_0, end = var_2087_end_0, end_mask = var_2087_end_mask_0, x = var_2043_cast_fp16)[name = tensor("op_2087_cast_fp16")]; tensor var_2088_begin_0 = const()[name = tensor("op_2088_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2088_end_0 = const()[name = tensor("op_2088_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2088_end_mask_0 = const()[name = tensor("op_2088_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2088_cast_fp16 = slice_by_index(begin = var_2088_begin_0, end = var_2088_end_0, end_mask = var_2088_end_mask_0, x = var_2043_cast_fp16)[name = tensor("op_2088_cast_fp16")]; tensor var_2089_begin_0 = const()[name = tensor("op_2089_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2089_end_0 = const()[name = tensor("op_2089_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2089_end_mask_0 = const()[name = tensor("op_2089_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2089_cast_fp16 = slice_by_index(begin = var_2089_begin_0, end = var_2089_end_0, end_mask = var_2089_end_mask_0, x = var_2043_cast_fp16)[name = tensor("op_2089_cast_fp16")]; tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = var_2047_cast_fp16)[name = tensor("op_2090_cast_fp16")]; tensor var_2091_begin_0 = const()[name = tensor("op_2091_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2091_end_0 = const()[name = tensor("op_2091_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2091_end_mask_0 = const()[name = tensor("op_2091_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, x = var_2047_cast_fp16)[name = tensor("op_2091_cast_fp16")]; tensor var_2092_begin_0 = const()[name = tensor("op_2092_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2092_end_0 = const()[name = tensor("op_2092_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2092_end_mask_0 = const()[name = tensor("op_2092_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2092_cast_fp16 = slice_by_index(begin = var_2092_begin_0, end = var_2092_end_0, end_mask = var_2092_end_mask_0, x = var_2047_cast_fp16)[name = tensor("op_2092_cast_fp16")]; tensor var_2093_begin_0 = const()[name = tensor("op_2093_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2093_end_0 = const()[name = tensor("op_2093_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2093_end_mask_0 = const()[name = tensor("op_2093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2093_cast_fp16 = slice_by_index(begin = var_2093_begin_0, end = var_2093_end_0, end_mask = var_2093_end_mask_0, x = var_2047_cast_fp16)[name = tensor("op_2093_cast_fp16")]; tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = var_2047_cast_fp16)[name = tensor("op_2094_cast_fp16")]; tensor var_2095_begin_0 = const()[name = tensor("op_2095_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2095_end_0 = const()[name = tensor("op_2095_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2095_end_mask_0 = const()[name = tensor("op_2095_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2095_cast_fp16 = slice_by_index(begin = var_2095_begin_0, end = var_2095_end_0, end_mask = var_2095_end_mask_0, x = var_2047_cast_fp16)[name = tensor("op_2095_cast_fp16")]; tensor var_2096_begin_0 = const()[name = tensor("op_2096_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2096_end_0 = const()[name = tensor("op_2096_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2096_end_mask_0 = const()[name = tensor("op_2096_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2096_cast_fp16 = slice_by_index(begin = var_2096_begin_0, end = var_2096_end_0, end_mask = var_2096_end_mask_0, x = var_2051_cast_fp16)[name = tensor("op_2096_cast_fp16")]; tensor var_2097_begin_0 = const()[name = tensor("op_2097_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2097_end_0 = const()[name = tensor("op_2097_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2097_end_mask_0 = const()[name = tensor("op_2097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2097_cast_fp16 = slice_by_index(begin = var_2097_begin_0, end = var_2097_end_0, end_mask = var_2097_end_mask_0, x = var_2051_cast_fp16)[name = tensor("op_2097_cast_fp16")]; tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = var_2051_cast_fp16)[name = tensor("op_2098_cast_fp16")]; tensor var_2099_begin_0 = const()[name = tensor("op_2099_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2099_end_0 = const()[name = tensor("op_2099_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2099_end_mask_0 = const()[name = tensor("op_2099_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2099_cast_fp16 = slice_by_index(begin = var_2099_begin_0, end = var_2099_end_0, end_mask = var_2099_end_mask_0, x = var_2051_cast_fp16)[name = tensor("op_2099_cast_fp16")]; tensor var_2100_begin_0 = const()[name = tensor("op_2100_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2100_end_0 = const()[name = tensor("op_2100_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2100_end_mask_0 = const()[name = tensor("op_2100_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2100_cast_fp16 = slice_by_index(begin = var_2100_begin_0, end = var_2100_end_0, end_mask = var_2100_end_mask_0, x = var_2051_cast_fp16)[name = tensor("op_2100_cast_fp16")]; tensor var_2101_begin_0 = const()[name = tensor("op_2101_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2101_end_0 = const()[name = tensor("op_2101_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2101_end_mask_0 = const()[name = tensor("op_2101_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = var_2051_cast_fp16)[name = tensor("op_2101_cast_fp16")]; tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor("transpose_2")]; tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2106_cast_fp16")]; tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2110_cast_fp16")]; tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2114_cast_fp16")]; tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2118_cast_fp16")]; tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2122_cast_fp16")]; tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2126_cast_fp16")]; tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2130_cast_fp16")]; tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2134_cast_fp16")]; tensor var_2136_begin_0 = const()[name = tensor("op_2136_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2136_end_0 = const()[name = tensor("op_2136_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2136_end_mask_0 = const()[name = tensor("op_2136_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2136_cast_fp16 = slice_by_index(begin = var_2136_begin_0, end = var_2136_end_0, end_mask = var_2136_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2136_cast_fp16")]; tensor var_2140_begin_0 = const()[name = tensor("op_2140_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2140_end_0 = const()[name = tensor("op_2140_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2140_end_mask_0 = const()[name = tensor("op_2140_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2140_cast_fp16 = slice_by_index(begin = var_2140_begin_0, end = var_2140_end_0, end_mask = var_2140_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2140_cast_fp16")]; tensor var_2144_begin_0 = const()[name = tensor("op_2144_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2144_end_0 = const()[name = tensor("op_2144_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2144_end_mask_0 = const()[name = tensor("op_2144_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2144_cast_fp16 = slice_by_index(begin = var_2144_begin_0, end = var_2144_end_0, end_mask = var_2144_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2144_cast_fp16")]; tensor var_2148_begin_0 = const()[name = tensor("op_2148_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2148_end_0 = const()[name = tensor("op_2148_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2148_end_mask_0 = const()[name = tensor("op_2148_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2148_cast_fp16 = slice_by_index(begin = var_2148_begin_0, end = var_2148_end_0, end_mask = var_2148_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2148_cast_fp16")]; tensor var_2152_begin_0 = const()[name = tensor("op_2152_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2152_end_0 = const()[name = tensor("op_2152_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2152_end_mask_0 = const()[name = tensor("op_2152_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2152_cast_fp16 = slice_by_index(begin = var_2152_begin_0, end = var_2152_end_0, end_mask = var_2152_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2152_cast_fp16")]; tensor var_2156_begin_0 = const()[name = tensor("op_2156_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2156_end_0 = const()[name = tensor("op_2156_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2156_end_mask_0 = const()[name = tensor("op_2156_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2156_cast_fp16 = slice_by_index(begin = var_2156_begin_0, end = var_2156_end_0, end_mask = var_2156_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2156_cast_fp16")]; tensor var_2160_begin_0 = const()[name = tensor("op_2160_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2160_end_0 = const()[name = tensor("op_2160_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2160_end_mask_0 = const()[name = tensor("op_2160_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2160_cast_fp16 = slice_by_index(begin = var_2160_begin_0, end = var_2160_end_0, end_mask = var_2160_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2160_cast_fp16")]; tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2164_cast_fp16")]; tensor _SplitHeadsQ__mh_w_289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_289_equation_0, values = (var_2106_cast_fp16, var_2054_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_291_equation_0, values = (var_2106_cast_fp16, var_2055_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_293_equation_0, values = (var_2106_cast_fp16, var_2056_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_295_equation_0, values = (var_2106_cast_fp16, var_2057_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_297_equation_0, values = (var_2106_cast_fp16, var_2058_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_299_equation_0, values = (var_2106_cast_fp16, var_2059_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_301_equation_0, values = (var_2110_cast_fp16, var_2060_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_303_equation_0, values = (var_2110_cast_fp16, var_2061_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_305_equation_0, values = (var_2110_cast_fp16, var_2062_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_307_equation_0, values = (var_2110_cast_fp16, var_2063_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_309_equation_0, values = (var_2110_cast_fp16, var_2064_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_311_equation_0, values = (var_2110_cast_fp16, var_2065_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_313_equation_0, values = (var_2114_cast_fp16, var_2066_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_315_equation_0, values = (var_2114_cast_fp16, var_2067_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_317_equation_0, values = (var_2114_cast_fp16, var_2068_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_319_equation_0, values = (var_2114_cast_fp16, var_2069_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_319_cast_fp16")]; tensor _SplitHeadsQ__mh_w_321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_321_equation_0, values = (var_2114_cast_fp16, var_2070_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_323_equation_0, values = (var_2114_cast_fp16, var_2071_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_325_equation_0, values = (var_2118_cast_fp16, var_2072_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_327_equation_0, values = (var_2118_cast_fp16, var_2073_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_329_equation_0, values = (var_2118_cast_fp16, var_2074_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_331_equation_0, values = (var_2118_cast_fp16, var_2075_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_333_equation_0, values = (var_2118_cast_fp16, var_2076_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_335_equation_0, values = (var_2118_cast_fp16, var_2077_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_337_equation_0, values = (var_2122_cast_fp16, var_2078_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_339_equation_0, values = (var_2122_cast_fp16, var_2079_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_341_equation_0, values = (var_2122_cast_fp16, var_2080_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_343_equation_0, values = (var_2122_cast_fp16, var_2081_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_345_equation_0, values = (var_2122_cast_fp16, var_2082_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_347_equation_0, values = (var_2122_cast_fp16, var_2083_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_349_equation_0, values = (var_2126_cast_fp16, var_2084_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_351_equation_0, values = (var_2126_cast_fp16, var_2085_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_353_equation_0, values = (var_2126_cast_fp16, var_2086_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_355_equation_0, values = (var_2126_cast_fp16, var_2087_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_357_equation_0, values = (var_2126_cast_fp16, var_2088_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_359_equation_0, values = (var_2126_cast_fp16, var_2089_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_361_equation_0, values = (var_2130_cast_fp16, var_2090_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_363_equation_0, values = (var_2130_cast_fp16, var_2091_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_365_equation_0, values = (var_2130_cast_fp16, var_2092_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_367_equation_0, values = (var_2130_cast_fp16, var_2093_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_369_equation_0, values = (var_2130_cast_fp16, var_2094_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_371_equation_0, values = (var_2130_cast_fp16, var_2095_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_373_equation_0, values = (var_2134_cast_fp16, var_2096_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_375_equation_0, values = (var_2134_cast_fp16, var_2097_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_377_equation_0, values = (var_2134_cast_fp16, var_2098_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_379_equation_0, values = (var_2134_cast_fp16, var_2099_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_381_equation_0, values = (var_2134_cast_fp16, var_2100_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_383_equation_0, values = (var_2134_cast_fp16, var_2101_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_383_cast_fp16")]; tensor var_2263_to_fp16 = const()[name = tensor("op_2263_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_289_cast_fp16, y = var_2263_to_fp16)[name = tensor("aw_chunk_289_cast_fp16")]; tensor var_2265_to_fp16 = const()[name = tensor("op_2265_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_291_cast_fp16, y = var_2265_to_fp16)[name = tensor("aw_chunk_291_cast_fp16")]; tensor var_2267_to_fp16 = const()[name = tensor("op_2267_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_293_cast_fp16, y = var_2267_to_fp16)[name = tensor("aw_chunk_293_cast_fp16")]; tensor var_2269_to_fp16 = const()[name = tensor("op_2269_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_295_cast_fp16, y = var_2269_to_fp16)[name = tensor("aw_chunk_295_cast_fp16")]; tensor var_2271_to_fp16 = const()[name = tensor("op_2271_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_297_cast_fp16, y = var_2271_to_fp16)[name = tensor("aw_chunk_297_cast_fp16")]; tensor var_2273_to_fp16 = const()[name = tensor("op_2273_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_299_cast_fp16, y = var_2273_to_fp16)[name = tensor("aw_chunk_299_cast_fp16")]; tensor var_2275_to_fp16 = const()[name = tensor("op_2275_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_301_cast_fp16, y = var_2275_to_fp16)[name = tensor("aw_chunk_301_cast_fp16")]; tensor var_2277_to_fp16 = const()[name = tensor("op_2277_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_303_cast_fp16, y = var_2277_to_fp16)[name = tensor("aw_chunk_303_cast_fp16")]; tensor var_2279_to_fp16 = const()[name = tensor("op_2279_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_305_cast_fp16, y = var_2279_to_fp16)[name = tensor("aw_chunk_305_cast_fp16")]; tensor var_2281_to_fp16 = const()[name = tensor("op_2281_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_307_cast_fp16, y = var_2281_to_fp16)[name = tensor("aw_chunk_307_cast_fp16")]; tensor var_2283_to_fp16 = const()[name = tensor("op_2283_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_309_cast_fp16, y = var_2283_to_fp16)[name = tensor("aw_chunk_309_cast_fp16")]; tensor var_2285_to_fp16 = const()[name = tensor("op_2285_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_311_cast_fp16, y = var_2285_to_fp16)[name = tensor("aw_chunk_311_cast_fp16")]; tensor var_2287_to_fp16 = const()[name = tensor("op_2287_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_313_cast_fp16, y = var_2287_to_fp16)[name = tensor("aw_chunk_313_cast_fp16")]; tensor var_2289_to_fp16 = const()[name = tensor("op_2289_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_315_cast_fp16, y = var_2289_to_fp16)[name = tensor("aw_chunk_315_cast_fp16")]; tensor var_2291_to_fp16 = const()[name = tensor("op_2291_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_317_cast_fp16, y = var_2291_to_fp16)[name = tensor("aw_chunk_317_cast_fp16")]; tensor var_2293_to_fp16 = const()[name = tensor("op_2293_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_319_cast_fp16, y = var_2293_to_fp16)[name = tensor("aw_chunk_319_cast_fp16")]; tensor var_2295_to_fp16 = const()[name = tensor("op_2295_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_321_cast_fp16, y = var_2295_to_fp16)[name = tensor("aw_chunk_321_cast_fp16")]; tensor var_2297_to_fp16 = const()[name = tensor("op_2297_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_323_cast_fp16, y = var_2297_to_fp16)[name = tensor("aw_chunk_323_cast_fp16")]; tensor var_2299_to_fp16 = const()[name = tensor("op_2299_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_325_cast_fp16, y = var_2299_to_fp16)[name = tensor("aw_chunk_325_cast_fp16")]; tensor var_2301_to_fp16 = const()[name = tensor("op_2301_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_327_cast_fp16, y = var_2301_to_fp16)[name = tensor("aw_chunk_327_cast_fp16")]; tensor var_2303_to_fp16 = const()[name = tensor("op_2303_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_329_cast_fp16, y = var_2303_to_fp16)[name = tensor("aw_chunk_329_cast_fp16")]; tensor var_2305_to_fp16 = const()[name = tensor("op_2305_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_331_cast_fp16, y = var_2305_to_fp16)[name = tensor("aw_chunk_331_cast_fp16")]; tensor var_2307_to_fp16 = const()[name = tensor("op_2307_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_333_cast_fp16, y = var_2307_to_fp16)[name = tensor("aw_chunk_333_cast_fp16")]; tensor var_2309_to_fp16 = const()[name = tensor("op_2309_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_335_cast_fp16, y = var_2309_to_fp16)[name = tensor("aw_chunk_335_cast_fp16")]; tensor var_2311_to_fp16 = const()[name = tensor("op_2311_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_337_cast_fp16, y = var_2311_to_fp16)[name = tensor("aw_chunk_337_cast_fp16")]; tensor var_2313_to_fp16 = const()[name = tensor("op_2313_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_339_cast_fp16, y = var_2313_to_fp16)[name = tensor("aw_chunk_339_cast_fp16")]; tensor var_2315_to_fp16 = const()[name = tensor("op_2315_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_341_cast_fp16, y = var_2315_to_fp16)[name = tensor("aw_chunk_341_cast_fp16")]; tensor var_2317_to_fp16 = const()[name = tensor("op_2317_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_343_cast_fp16, y = var_2317_to_fp16)[name = tensor("aw_chunk_343_cast_fp16")]; tensor var_2319_to_fp16 = const()[name = tensor("op_2319_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_345_cast_fp16, y = var_2319_to_fp16)[name = tensor("aw_chunk_345_cast_fp16")]; tensor var_2321_to_fp16 = const()[name = tensor("op_2321_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_347_cast_fp16, y = var_2321_to_fp16)[name = tensor("aw_chunk_347_cast_fp16")]; tensor var_2323_to_fp16 = const()[name = tensor("op_2323_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_349_cast_fp16, y = var_2323_to_fp16)[name = tensor("aw_chunk_349_cast_fp16")]; tensor var_2325_to_fp16 = const()[name = tensor("op_2325_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_351_cast_fp16, y = var_2325_to_fp16)[name = tensor("aw_chunk_351_cast_fp16")]; tensor var_2327_to_fp16 = const()[name = tensor("op_2327_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_353_cast_fp16, y = var_2327_to_fp16)[name = tensor("aw_chunk_353_cast_fp16")]; tensor var_2329_to_fp16 = const()[name = tensor("op_2329_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_355_cast_fp16, y = var_2329_to_fp16)[name = tensor("aw_chunk_355_cast_fp16")]; tensor var_2331_to_fp16 = const()[name = tensor("op_2331_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_357_cast_fp16, y = var_2331_to_fp16)[name = tensor("aw_chunk_357_cast_fp16")]; tensor var_2333_to_fp16 = const()[name = tensor("op_2333_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_359_cast_fp16, y = var_2333_to_fp16)[name = tensor("aw_chunk_359_cast_fp16")]; tensor var_2335_to_fp16 = const()[name = tensor("op_2335_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_361_cast_fp16, y = var_2335_to_fp16)[name = tensor("aw_chunk_361_cast_fp16")]; tensor var_2337_to_fp16 = const()[name = tensor("op_2337_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_363_cast_fp16, y = var_2337_to_fp16)[name = tensor("aw_chunk_363_cast_fp16")]; tensor var_2339_to_fp16 = const()[name = tensor("op_2339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_365_cast_fp16, y = var_2339_to_fp16)[name = tensor("aw_chunk_365_cast_fp16")]; tensor var_2341_to_fp16 = const()[name = tensor("op_2341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_367_cast_fp16, y = var_2341_to_fp16)[name = tensor("aw_chunk_367_cast_fp16")]; tensor var_2343_to_fp16 = const()[name = tensor("op_2343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_369_cast_fp16, y = var_2343_to_fp16)[name = tensor("aw_chunk_369_cast_fp16")]; tensor var_2345_to_fp16 = const()[name = tensor("op_2345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_371_cast_fp16, y = var_2345_to_fp16)[name = tensor("aw_chunk_371_cast_fp16")]; tensor var_2347_to_fp16 = const()[name = tensor("op_2347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_373_cast_fp16, y = var_2347_to_fp16)[name = tensor("aw_chunk_373_cast_fp16")]; tensor var_2349_to_fp16 = const()[name = tensor("op_2349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_375_cast_fp16, y = var_2349_to_fp16)[name = tensor("aw_chunk_375_cast_fp16")]; tensor var_2351_to_fp16 = const()[name = tensor("op_2351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_377_cast_fp16, y = var_2351_to_fp16)[name = tensor("aw_chunk_377_cast_fp16")]; tensor var_2353_to_fp16 = const()[name = tensor("op_2353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_379_cast_fp16, y = var_2353_to_fp16)[name = tensor("aw_chunk_379_cast_fp16")]; tensor var_2355_to_fp16 = const()[name = tensor("op_2355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_381_cast_fp16, y = var_2355_to_fp16)[name = tensor("aw_chunk_381_cast_fp16")]; tensor var_2357_to_fp16 = const()[name = tensor("op_2357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_383_cast_fp16, y = var_2357_to_fp16)[name = tensor("aw_chunk_383_cast_fp16")]; tensor var_2359_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_289_cast_fp16)[name = tensor("op_2359_cast_fp16")]; tensor var_2360_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_291_cast_fp16)[name = tensor("op_2360_cast_fp16")]; tensor var_2361_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_293_cast_fp16)[name = tensor("op_2361_cast_fp16")]; tensor var_2362_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_295_cast_fp16)[name = tensor("op_2362_cast_fp16")]; tensor var_2363_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_297_cast_fp16)[name = tensor("op_2363_cast_fp16")]; tensor var_2364_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_299_cast_fp16)[name = tensor("op_2364_cast_fp16")]; tensor var_2365_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_301_cast_fp16)[name = tensor("op_2365_cast_fp16")]; tensor var_2366_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_303_cast_fp16)[name = tensor("op_2366_cast_fp16")]; tensor var_2367_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_305_cast_fp16)[name = tensor("op_2367_cast_fp16")]; tensor var_2368_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_307_cast_fp16)[name = tensor("op_2368_cast_fp16")]; tensor var_2369_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_309_cast_fp16)[name = tensor("op_2369_cast_fp16")]; tensor var_2370_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_311_cast_fp16)[name = tensor("op_2370_cast_fp16")]; tensor var_2371_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_313_cast_fp16)[name = tensor("op_2371_cast_fp16")]; tensor var_2372_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_315_cast_fp16)[name = tensor("op_2372_cast_fp16")]; tensor var_2373_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_317_cast_fp16)[name = tensor("op_2373_cast_fp16")]; tensor var_2374_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_319_cast_fp16)[name = tensor("op_2374_cast_fp16")]; tensor var_2375_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_321_cast_fp16)[name = tensor("op_2375_cast_fp16")]; tensor var_2376_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_323_cast_fp16)[name = tensor("op_2376_cast_fp16")]; tensor var_2377_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_325_cast_fp16)[name = tensor("op_2377_cast_fp16")]; tensor var_2378_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_327_cast_fp16)[name = tensor("op_2378_cast_fp16")]; tensor var_2379_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_329_cast_fp16)[name = tensor("op_2379_cast_fp16")]; tensor var_2380_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_331_cast_fp16)[name = tensor("op_2380_cast_fp16")]; tensor var_2381_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_333_cast_fp16)[name = tensor("op_2381_cast_fp16")]; tensor var_2382_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_335_cast_fp16)[name = tensor("op_2382_cast_fp16")]; tensor var_2383_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_337_cast_fp16)[name = tensor("op_2383_cast_fp16")]; tensor var_2384_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_339_cast_fp16)[name = tensor("op_2384_cast_fp16")]; tensor var_2385_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_341_cast_fp16)[name = tensor("op_2385_cast_fp16")]; tensor var_2386_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_343_cast_fp16)[name = tensor("op_2386_cast_fp16")]; tensor var_2387_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_345_cast_fp16)[name = tensor("op_2387_cast_fp16")]; tensor var_2388_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_347_cast_fp16)[name = tensor("op_2388_cast_fp16")]; tensor var_2389_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_349_cast_fp16)[name = tensor("op_2389_cast_fp16")]; tensor var_2390_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_351_cast_fp16)[name = tensor("op_2390_cast_fp16")]; tensor var_2391_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_353_cast_fp16)[name = tensor("op_2391_cast_fp16")]; tensor var_2392_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_355_cast_fp16)[name = tensor("op_2392_cast_fp16")]; tensor var_2393_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_357_cast_fp16)[name = tensor("op_2393_cast_fp16")]; tensor var_2394_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_359_cast_fp16)[name = tensor("op_2394_cast_fp16")]; tensor var_2395_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_361_cast_fp16)[name = tensor("op_2395_cast_fp16")]; tensor var_2396_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_363_cast_fp16)[name = tensor("op_2396_cast_fp16")]; tensor var_2397_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_365_cast_fp16)[name = tensor("op_2397_cast_fp16")]; tensor var_2398_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_367_cast_fp16)[name = tensor("op_2398_cast_fp16")]; tensor var_2399_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_369_cast_fp16)[name = tensor("op_2399_cast_fp16")]; tensor var_2400_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_371_cast_fp16)[name = tensor("op_2400_cast_fp16")]; tensor var_2401_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_373_cast_fp16)[name = tensor("op_2401_cast_fp16")]; tensor var_2402_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_375_cast_fp16)[name = tensor("op_2402_cast_fp16")]; tensor var_2403_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_377_cast_fp16)[name = tensor("op_2403_cast_fp16")]; tensor var_2404_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_379_cast_fp16)[name = tensor("op_2404_cast_fp16")]; tensor var_2405_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_381_cast_fp16)[name = tensor("op_2405_cast_fp16")]; tensor var_2406_cast_fp16 = softmax(axis = var_1971, x = aw_chunk_383_cast_fp16)[name = tensor("op_2406_cast_fp16")]; tensor var_2408_equation_0 = const()[name = tensor("op_2408_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2408_cast_fp16 = einsum(equation = var_2408_equation_0, values = (var_2136_cast_fp16, var_2359_cast_fp16))[name = tensor("op_2408_cast_fp16")]; tensor var_2410_equation_0 = const()[name = tensor("op_2410_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2410_cast_fp16 = einsum(equation = var_2410_equation_0, values = (var_2136_cast_fp16, var_2360_cast_fp16))[name = tensor("op_2410_cast_fp16")]; tensor var_2412_equation_0 = const()[name = tensor("op_2412_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2412_cast_fp16 = einsum(equation = var_2412_equation_0, values = (var_2136_cast_fp16, var_2361_cast_fp16))[name = tensor("op_2412_cast_fp16")]; tensor var_2414_equation_0 = const()[name = tensor("op_2414_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2414_cast_fp16 = einsum(equation = var_2414_equation_0, values = (var_2136_cast_fp16, var_2362_cast_fp16))[name = tensor("op_2414_cast_fp16")]; tensor var_2416_equation_0 = const()[name = tensor("op_2416_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2416_cast_fp16 = einsum(equation = var_2416_equation_0, values = (var_2136_cast_fp16, var_2363_cast_fp16))[name = tensor("op_2416_cast_fp16")]; tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2136_cast_fp16, var_2364_cast_fp16))[name = tensor("op_2418_cast_fp16")]; tensor var_2420_equation_0 = const()[name = tensor("op_2420_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2420_cast_fp16 = einsum(equation = var_2420_equation_0, values = (var_2140_cast_fp16, var_2365_cast_fp16))[name = tensor("op_2420_cast_fp16")]; tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2140_cast_fp16, var_2366_cast_fp16))[name = tensor("op_2422_cast_fp16")]; tensor var_2424_equation_0 = const()[name = tensor("op_2424_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2424_cast_fp16 = einsum(equation = var_2424_equation_0, values = (var_2140_cast_fp16, var_2367_cast_fp16))[name = tensor("op_2424_cast_fp16")]; tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2426_cast_fp16 = einsum(equation = var_2426_equation_0, values = (var_2140_cast_fp16, var_2368_cast_fp16))[name = tensor("op_2426_cast_fp16")]; tensor var_2428_equation_0 = const()[name = tensor("op_2428_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2428_cast_fp16 = einsum(equation = var_2428_equation_0, values = (var_2140_cast_fp16, var_2369_cast_fp16))[name = tensor("op_2428_cast_fp16")]; tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2430_cast_fp16 = einsum(equation = var_2430_equation_0, values = (var_2140_cast_fp16, var_2370_cast_fp16))[name = tensor("op_2430_cast_fp16")]; tensor var_2432_equation_0 = const()[name = tensor("op_2432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2432_cast_fp16 = einsum(equation = var_2432_equation_0, values = (var_2144_cast_fp16, var_2371_cast_fp16))[name = tensor("op_2432_cast_fp16")]; tensor var_2434_equation_0 = const()[name = tensor("op_2434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2434_cast_fp16 = einsum(equation = var_2434_equation_0, values = (var_2144_cast_fp16, var_2372_cast_fp16))[name = tensor("op_2434_cast_fp16")]; tensor var_2436_equation_0 = const()[name = tensor("op_2436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2436_cast_fp16 = einsum(equation = var_2436_equation_0, values = (var_2144_cast_fp16, var_2373_cast_fp16))[name = tensor("op_2436_cast_fp16")]; tensor var_2438_equation_0 = const()[name = tensor("op_2438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2438_cast_fp16 = einsum(equation = var_2438_equation_0, values = (var_2144_cast_fp16, var_2374_cast_fp16))[name = tensor("op_2438_cast_fp16")]; tensor var_2440_equation_0 = const()[name = tensor("op_2440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2440_cast_fp16 = einsum(equation = var_2440_equation_0, values = (var_2144_cast_fp16, var_2375_cast_fp16))[name = tensor("op_2440_cast_fp16")]; tensor var_2442_equation_0 = const()[name = tensor("op_2442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2442_cast_fp16 = einsum(equation = var_2442_equation_0, values = (var_2144_cast_fp16, var_2376_cast_fp16))[name = tensor("op_2442_cast_fp16")]; tensor var_2444_equation_0 = const()[name = tensor("op_2444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2444_cast_fp16 = einsum(equation = var_2444_equation_0, values = (var_2148_cast_fp16, var_2377_cast_fp16))[name = tensor("op_2444_cast_fp16")]; tensor var_2446_equation_0 = const()[name = tensor("op_2446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2446_cast_fp16 = einsum(equation = var_2446_equation_0, values = (var_2148_cast_fp16, var_2378_cast_fp16))[name = tensor("op_2446_cast_fp16")]; tensor var_2448_equation_0 = const()[name = tensor("op_2448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2448_cast_fp16 = einsum(equation = var_2448_equation_0, values = (var_2148_cast_fp16, var_2379_cast_fp16))[name = tensor("op_2448_cast_fp16")]; tensor var_2450_equation_0 = const()[name = tensor("op_2450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2450_cast_fp16 = einsum(equation = var_2450_equation_0, values = (var_2148_cast_fp16, var_2380_cast_fp16))[name = tensor("op_2450_cast_fp16")]; tensor var_2452_equation_0 = const()[name = tensor("op_2452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2452_cast_fp16 = einsum(equation = var_2452_equation_0, values = (var_2148_cast_fp16, var_2381_cast_fp16))[name = tensor("op_2452_cast_fp16")]; tensor var_2454_equation_0 = const()[name = tensor("op_2454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2454_cast_fp16 = einsum(equation = var_2454_equation_0, values = (var_2148_cast_fp16, var_2382_cast_fp16))[name = tensor("op_2454_cast_fp16")]; tensor var_2456_equation_0 = const()[name = tensor("op_2456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2456_cast_fp16 = einsum(equation = var_2456_equation_0, values = (var_2152_cast_fp16, var_2383_cast_fp16))[name = tensor("op_2456_cast_fp16")]; tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2152_cast_fp16, var_2384_cast_fp16))[name = tensor("op_2458_cast_fp16")]; tensor var_2460_equation_0 = const()[name = tensor("op_2460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2460_cast_fp16 = einsum(equation = var_2460_equation_0, values = (var_2152_cast_fp16, var_2385_cast_fp16))[name = tensor("op_2460_cast_fp16")]; tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2152_cast_fp16, var_2386_cast_fp16))[name = tensor("op_2462_cast_fp16")]; tensor var_2464_equation_0 = const()[name = tensor("op_2464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2464_cast_fp16 = einsum(equation = var_2464_equation_0, values = (var_2152_cast_fp16, var_2387_cast_fp16))[name = tensor("op_2464_cast_fp16")]; tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2152_cast_fp16, var_2388_cast_fp16))[name = tensor("op_2466_cast_fp16")]; tensor var_2468_equation_0 = const()[name = tensor("op_2468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2156_cast_fp16, var_2389_cast_fp16))[name = tensor("op_2468_cast_fp16")]; tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2156_cast_fp16, var_2390_cast_fp16))[name = tensor("op_2470_cast_fp16")]; tensor var_2472_equation_0 = const()[name = tensor("op_2472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2156_cast_fp16, var_2391_cast_fp16))[name = tensor("op_2472_cast_fp16")]; tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2156_cast_fp16, var_2392_cast_fp16))[name = tensor("op_2474_cast_fp16")]; tensor var_2476_equation_0 = const()[name = tensor("op_2476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2156_cast_fp16, var_2393_cast_fp16))[name = tensor("op_2476_cast_fp16")]; tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2156_cast_fp16, var_2394_cast_fp16))[name = tensor("op_2478_cast_fp16")]; tensor var_2480_equation_0 = const()[name = tensor("op_2480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2160_cast_fp16, var_2395_cast_fp16))[name = tensor("op_2480_cast_fp16")]; tensor var_2482_equation_0 = const()[name = tensor("op_2482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2160_cast_fp16, var_2396_cast_fp16))[name = tensor("op_2482_cast_fp16")]; tensor var_2484_equation_0 = const()[name = tensor("op_2484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2484_cast_fp16 = einsum(equation = var_2484_equation_0, values = (var_2160_cast_fp16, var_2397_cast_fp16))[name = tensor("op_2484_cast_fp16")]; tensor var_2486_equation_0 = const()[name = tensor("op_2486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2160_cast_fp16, var_2398_cast_fp16))[name = tensor("op_2486_cast_fp16")]; tensor var_2488_equation_0 = const()[name = tensor("op_2488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2488_cast_fp16 = einsum(equation = var_2488_equation_0, values = (var_2160_cast_fp16, var_2399_cast_fp16))[name = tensor("op_2488_cast_fp16")]; tensor var_2490_equation_0 = const()[name = tensor("op_2490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2160_cast_fp16, var_2400_cast_fp16))[name = tensor("op_2490_cast_fp16")]; tensor var_2492_equation_0 = const()[name = tensor("op_2492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2492_cast_fp16 = einsum(equation = var_2492_equation_0, values = (var_2164_cast_fp16, var_2401_cast_fp16))[name = tensor("op_2492_cast_fp16")]; tensor var_2494_equation_0 = const()[name = tensor("op_2494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2494_cast_fp16 = einsum(equation = var_2494_equation_0, values = (var_2164_cast_fp16, var_2402_cast_fp16))[name = tensor("op_2494_cast_fp16")]; tensor var_2496_equation_0 = const()[name = tensor("op_2496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2496_cast_fp16 = einsum(equation = var_2496_equation_0, values = (var_2164_cast_fp16, var_2403_cast_fp16))[name = tensor("op_2496_cast_fp16")]; tensor var_2498_equation_0 = const()[name = tensor("op_2498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2498_cast_fp16 = einsum(equation = var_2498_equation_0, values = (var_2164_cast_fp16, var_2404_cast_fp16))[name = tensor("op_2498_cast_fp16")]; tensor var_2500_equation_0 = const()[name = tensor("op_2500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2500_cast_fp16 = einsum(equation = var_2500_equation_0, values = (var_2164_cast_fp16, var_2405_cast_fp16))[name = tensor("op_2500_cast_fp16")]; tensor var_2502_equation_0 = const()[name = tensor("op_2502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2502_cast_fp16 = einsum(equation = var_2502_equation_0, values = (var_2164_cast_fp16, var_2406_cast_fp16))[name = tensor("op_2502_cast_fp16")]; tensor var_2504_interleave_0 = const()[name = tensor("op_2504_interleave_0"), val = tensor(false)]; tensor var_2504_cast_fp16 = concat(axis = var_1958, interleave = var_2504_interleave_0, values = (var_2408_cast_fp16, var_2410_cast_fp16, var_2412_cast_fp16, var_2414_cast_fp16, var_2416_cast_fp16, var_2418_cast_fp16))[name = tensor("op_2504_cast_fp16")]; tensor var_2506_interleave_0 = const()[name = tensor("op_2506_interleave_0"), val = tensor(false)]; tensor var_2506_cast_fp16 = concat(axis = var_1958, interleave = var_2506_interleave_0, values = (var_2420_cast_fp16, var_2422_cast_fp16, var_2424_cast_fp16, var_2426_cast_fp16, var_2428_cast_fp16, var_2430_cast_fp16))[name = tensor("op_2506_cast_fp16")]; tensor var_2508_interleave_0 = const()[name = tensor("op_2508_interleave_0"), val = tensor(false)]; tensor var_2508_cast_fp16 = concat(axis = var_1958, interleave = var_2508_interleave_0, values = (var_2432_cast_fp16, var_2434_cast_fp16, var_2436_cast_fp16, var_2438_cast_fp16, var_2440_cast_fp16, var_2442_cast_fp16))[name = tensor("op_2508_cast_fp16")]; tensor var_2510_interleave_0 = const()[name = tensor("op_2510_interleave_0"), val = tensor(false)]; tensor var_2510_cast_fp16 = concat(axis = var_1958, interleave = var_2510_interleave_0, values = (var_2444_cast_fp16, var_2446_cast_fp16, var_2448_cast_fp16, var_2450_cast_fp16, var_2452_cast_fp16, var_2454_cast_fp16))[name = tensor("op_2510_cast_fp16")]; tensor var_2512_interleave_0 = const()[name = tensor("op_2512_interleave_0"), val = tensor(false)]; tensor var_2512_cast_fp16 = concat(axis = var_1958, interleave = var_2512_interleave_0, values = (var_2456_cast_fp16, var_2458_cast_fp16, var_2460_cast_fp16, var_2462_cast_fp16, var_2464_cast_fp16, var_2466_cast_fp16))[name = tensor("op_2512_cast_fp16")]; tensor var_2514_interleave_0 = const()[name = tensor("op_2514_interleave_0"), val = tensor(false)]; tensor var_2514_cast_fp16 = concat(axis = var_1958, interleave = var_2514_interleave_0, values = (var_2468_cast_fp16, var_2470_cast_fp16, var_2472_cast_fp16, var_2474_cast_fp16, var_2476_cast_fp16, var_2478_cast_fp16))[name = tensor("op_2514_cast_fp16")]; tensor var_2516_interleave_0 = const()[name = tensor("op_2516_interleave_0"), val = tensor(false)]; tensor var_2516_cast_fp16 = concat(axis = var_1958, interleave = var_2516_interleave_0, values = (var_2480_cast_fp16, var_2482_cast_fp16, var_2484_cast_fp16, var_2486_cast_fp16, var_2488_cast_fp16, var_2490_cast_fp16))[name = tensor("op_2516_cast_fp16")]; tensor var_2518_interleave_0 = const()[name = tensor("op_2518_interleave_0"), val = tensor(false)]; tensor var_2518_cast_fp16 = concat(axis = var_1958, interleave = var_2518_interleave_0, values = (var_2492_cast_fp16, var_2494_cast_fp16, var_2496_cast_fp16, var_2498_cast_fp16, var_2500_cast_fp16, var_2502_cast_fp16))[name = tensor("op_2518_cast_fp16")]; tensor input_25_interleave_0 = const()[name = tensor("input_25_interleave_0"), val = tensor(false)]; tensor input_25_cast_fp16 = concat(axis = var_1971, interleave = input_25_interleave_0, values = (var_2504_cast_fp16, var_2506_cast_fp16, var_2508_cast_fp16, var_2510_cast_fp16, var_2512_cast_fp16, var_2514_cast_fp16, var_2516_cast_fp16, var_2518_cast_fp16))[name = tensor("input_25_cast_fp16")]; tensor obj_15_pad_type_0 = const()[name = tensor("obj_15_pad_type_0"), val = tensor("valid")]; tensor obj_15_strides_0 = const()[name = tensor("obj_15_strides_0"), val = tensor([1, 1])]; tensor obj_15_pad_0 = const()[name = tensor("obj_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_15_dilations_0 = const()[name = tensor("obj_15_dilations_0"), val = tensor([1, 1])]; tensor obj_15_groups_0 = const()[name = tensor("obj_15_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23850752)))]; tensor layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24375104)))]; tensor obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_15_dilations_0, groups = obj_15_groups_0, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = obj_15_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("obj_15_cast_fp16")]; tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; tensor var_2537_to_fp16 = const()[name = tensor("op_2537_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_2537_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24376192)))]; tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24377280)))]; tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor input_29_pad_type_0 = const()[name = tensor("input_29_pad_type_0"), val = tensor("valid")]; tensor input_29_strides_0 = const()[name = tensor("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = tensor("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = tensor("input_29_dilations_0"), val = tensor([1, 1])]; tensor input_29_groups_0 = const()[name = tensor("input_29_groups_0"), val = tensor(1)]; tensor layers_3_fc1_weight_to_fp16 = const()[name = tensor("layers_3_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24378368)))]; tensor layers_3_fc1_bias_to_fp16 = const()[name = tensor("layers_3_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26475584)))]; tensor input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor hidden_states_11_pad_type_0 = const()[name = tensor("hidden_states_11_pad_type_0"), val = tensor("valid")]; tensor hidden_states_11_strides_0 = const()[name = tensor("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = tensor("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = tensor("hidden_states_11_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_11_groups_0 = const()[name = tensor("hidden_states_11_groups_0"), val = tensor(1)]; tensor layers_3_fc2_weight_to_fp16 = const()[name = tensor("layers_3_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26479744)))]; tensor layers_3_fc2_bias_to_fp16 = const()[name = tensor("layers_3_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28576960)))]; tensor hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; tensor var_2569 = const()[name = tensor("op_2569"), val = tensor(3)]; tensor var_2582 = const()[name = tensor("op_2582"), val = tensor(1)]; tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; tensor var_2599_to_fp16 = const()[name = tensor("op_2599_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_2599_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28578048)))]; tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28579136)))]; tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; tensor query_9_pad_type_0 = const()[name = tensor("query_9_pad_type_0"), val = tensor("valid")]; tensor query_9_strides_0 = const()[name = tensor("query_9_strides_0"), val = tensor([1, 1])]; tensor query_9_pad_0 = const()[name = tensor("query_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_9_dilations_0 = const()[name = tensor("query_9_dilations_0"), val = tensor([1, 1])]; tensor query_9_groups_0 = const()[name = tensor("query_9_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28580224)))]; tensor layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29104576)))]; tensor query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = query_9_dilations_0, groups = query_9_groups_0, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = query_9_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("query_9_cast_fp16")]; tensor key_9_pad_type_0 = const()[name = tensor("key_9_pad_type_0"), val = tensor("valid")]; tensor key_9_strides_0 = const()[name = tensor("key_9_strides_0"), val = tensor([1, 1])]; tensor key_9_pad_0 = const()[name = tensor("key_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_9_dilations_0 = const()[name = tensor("key_9_dilations_0"), val = tensor([1, 1])]; tensor key_9_groups_0 = const()[name = tensor("key_9_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29105664)))]; tensor key_9_cast_fp16 = conv(dilations = key_9_dilations_0, groups = key_9_groups_0, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = key_9_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("key_9_cast_fp16")]; tensor value_9_pad_type_0 = const()[name = tensor("value_9_pad_type_0"), val = tensor("valid")]; tensor value_9_strides_0 = const()[name = tensor("value_9_strides_0"), val = tensor([1, 1])]; tensor value_9_pad_0 = const()[name = tensor("value_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_9_dilations_0 = const()[name = tensor("value_9_dilations_0"), val = tensor([1, 1])]; tensor value_9_groups_0 = const()[name = tensor("value_9_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29630016)))]; tensor layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30154368)))]; tensor value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = value_9_dilations_0, groups = value_9_groups_0, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = value_9_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("value_9_cast_fp16")]; tensor var_2634_begin_0 = const()[name = tensor("op_2634_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2634_end_0 = const()[name = tensor("op_2634_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2634_end_mask_0 = const()[name = tensor("op_2634_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2634_cast_fp16 = slice_by_index(begin = var_2634_begin_0, end = var_2634_end_0, end_mask = var_2634_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2634_cast_fp16")]; tensor var_2638_begin_0 = const()[name = tensor("op_2638_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2638_end_0 = const()[name = tensor("op_2638_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2638_end_mask_0 = const()[name = tensor("op_2638_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2638_cast_fp16 = slice_by_index(begin = var_2638_begin_0, end = var_2638_end_0, end_mask = var_2638_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2638_cast_fp16")]; tensor var_2642_begin_0 = const()[name = tensor("op_2642_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2642_end_0 = const()[name = tensor("op_2642_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2642_end_mask_0 = const()[name = tensor("op_2642_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2642_cast_fp16 = slice_by_index(begin = var_2642_begin_0, end = var_2642_end_0, end_mask = var_2642_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2642_cast_fp16")]; tensor var_2646_begin_0 = const()[name = tensor("op_2646_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2646_end_0 = const()[name = tensor("op_2646_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2646_end_mask_0 = const()[name = tensor("op_2646_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2646_cast_fp16 = slice_by_index(begin = var_2646_begin_0, end = var_2646_end_0, end_mask = var_2646_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2646_cast_fp16")]; tensor var_2650_begin_0 = const()[name = tensor("op_2650_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2650_end_0 = const()[name = tensor("op_2650_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2650_end_mask_0 = const()[name = tensor("op_2650_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2650_cast_fp16 = slice_by_index(begin = var_2650_begin_0, end = var_2650_end_0, end_mask = var_2650_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2650_cast_fp16")]; tensor var_2654_begin_0 = const()[name = tensor("op_2654_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2654_end_0 = const()[name = tensor("op_2654_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2654_end_mask_0 = const()[name = tensor("op_2654_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2654_cast_fp16 = slice_by_index(begin = var_2654_begin_0, end = var_2654_end_0, end_mask = var_2654_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2654_cast_fp16")]; tensor var_2658_begin_0 = const()[name = tensor("op_2658_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2658_end_0 = const()[name = tensor("op_2658_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2658_end_mask_0 = const()[name = tensor("op_2658_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2658_cast_fp16 = slice_by_index(begin = var_2658_begin_0, end = var_2658_end_0, end_mask = var_2658_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2658_cast_fp16")]; tensor var_2662_begin_0 = const()[name = tensor("op_2662_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2662_end_0 = const()[name = tensor("op_2662_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_2662_end_mask_0 = const()[name = tensor("op_2662_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2662_cast_fp16 = slice_by_index(begin = var_2662_begin_0, end = var_2662_end_0, end_mask = var_2662_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_2662_cast_fp16")]; tensor var_2665_begin_0 = const()[name = tensor("op_2665_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2665_end_0 = const()[name = tensor("op_2665_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2665_end_mask_0 = const()[name = tensor("op_2665_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2665_cast_fp16 = slice_by_index(begin = var_2665_begin_0, end = var_2665_end_0, end_mask = var_2665_end_mask_0, x = var_2634_cast_fp16)[name = tensor("op_2665_cast_fp16")]; tensor var_2666_begin_0 = const()[name = tensor("op_2666_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2666_end_0 = const()[name = tensor("op_2666_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2666_end_mask_0 = const()[name = tensor("op_2666_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2666_cast_fp16 = slice_by_index(begin = var_2666_begin_0, end = var_2666_end_0, end_mask = var_2666_end_mask_0, x = var_2634_cast_fp16)[name = tensor("op_2666_cast_fp16")]; tensor var_2667_begin_0 = const()[name = tensor("op_2667_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2667_end_0 = const()[name = tensor("op_2667_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2667_end_mask_0 = const()[name = tensor("op_2667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2667_cast_fp16 = slice_by_index(begin = var_2667_begin_0, end = var_2667_end_0, end_mask = var_2667_end_mask_0, x = var_2634_cast_fp16)[name = tensor("op_2667_cast_fp16")]; tensor var_2668_begin_0 = const()[name = tensor("op_2668_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2668_end_0 = const()[name = tensor("op_2668_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2668_end_mask_0 = const()[name = tensor("op_2668_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2668_cast_fp16 = slice_by_index(begin = var_2668_begin_0, end = var_2668_end_0, end_mask = var_2668_end_mask_0, x = var_2634_cast_fp16)[name = tensor("op_2668_cast_fp16")]; tensor var_2669_begin_0 = const()[name = tensor("op_2669_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2669_end_0 = const()[name = tensor("op_2669_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2669_end_mask_0 = const()[name = tensor("op_2669_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2669_cast_fp16 = slice_by_index(begin = var_2669_begin_0, end = var_2669_end_0, end_mask = var_2669_end_mask_0, x = var_2634_cast_fp16)[name = tensor("op_2669_cast_fp16")]; tensor var_2670_begin_0 = const()[name = tensor("op_2670_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2670_end_0 = const()[name = tensor("op_2670_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2670_end_mask_0 = const()[name = tensor("op_2670_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2670_cast_fp16 = slice_by_index(begin = var_2670_begin_0, end = var_2670_end_0, end_mask = var_2670_end_mask_0, x = var_2634_cast_fp16)[name = tensor("op_2670_cast_fp16")]; tensor var_2671_begin_0 = const()[name = tensor("op_2671_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2671_end_0 = const()[name = tensor("op_2671_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2671_end_mask_0 = const()[name = tensor("op_2671_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2671_cast_fp16 = slice_by_index(begin = var_2671_begin_0, end = var_2671_end_0, end_mask = var_2671_end_mask_0, x = var_2638_cast_fp16)[name = tensor("op_2671_cast_fp16")]; tensor var_2672_begin_0 = const()[name = tensor("op_2672_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2672_end_0 = const()[name = tensor("op_2672_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2672_end_mask_0 = const()[name = tensor("op_2672_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2672_cast_fp16 = slice_by_index(begin = var_2672_begin_0, end = var_2672_end_0, end_mask = var_2672_end_mask_0, x = var_2638_cast_fp16)[name = tensor("op_2672_cast_fp16")]; tensor var_2673_begin_0 = const()[name = tensor("op_2673_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2673_end_0 = const()[name = tensor("op_2673_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2673_end_mask_0 = const()[name = tensor("op_2673_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2673_cast_fp16 = slice_by_index(begin = var_2673_begin_0, end = var_2673_end_0, end_mask = var_2673_end_mask_0, x = var_2638_cast_fp16)[name = tensor("op_2673_cast_fp16")]; tensor var_2674_begin_0 = const()[name = tensor("op_2674_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2674_end_0 = const()[name = tensor("op_2674_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2674_end_mask_0 = const()[name = tensor("op_2674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2674_cast_fp16 = slice_by_index(begin = var_2674_begin_0, end = var_2674_end_0, end_mask = var_2674_end_mask_0, x = var_2638_cast_fp16)[name = tensor("op_2674_cast_fp16")]; tensor var_2675_begin_0 = const()[name = tensor("op_2675_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2675_end_0 = const()[name = tensor("op_2675_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2675_end_mask_0 = const()[name = tensor("op_2675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2675_cast_fp16 = slice_by_index(begin = var_2675_begin_0, end = var_2675_end_0, end_mask = var_2675_end_mask_0, x = var_2638_cast_fp16)[name = tensor("op_2675_cast_fp16")]; tensor var_2676_begin_0 = const()[name = tensor("op_2676_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2676_end_0 = const()[name = tensor("op_2676_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2676_end_mask_0 = const()[name = tensor("op_2676_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2676_cast_fp16 = slice_by_index(begin = var_2676_begin_0, end = var_2676_end_0, end_mask = var_2676_end_mask_0, x = var_2638_cast_fp16)[name = tensor("op_2676_cast_fp16")]; tensor var_2677_begin_0 = const()[name = tensor("op_2677_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2677_end_0 = const()[name = tensor("op_2677_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2677_end_mask_0 = const()[name = tensor("op_2677_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2677_cast_fp16 = slice_by_index(begin = var_2677_begin_0, end = var_2677_end_0, end_mask = var_2677_end_mask_0, x = var_2642_cast_fp16)[name = tensor("op_2677_cast_fp16")]; tensor var_2678_begin_0 = const()[name = tensor("op_2678_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2678_end_0 = const()[name = tensor("op_2678_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2678_end_mask_0 = const()[name = tensor("op_2678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2678_cast_fp16 = slice_by_index(begin = var_2678_begin_0, end = var_2678_end_0, end_mask = var_2678_end_mask_0, x = var_2642_cast_fp16)[name = tensor("op_2678_cast_fp16")]; tensor var_2679_begin_0 = const()[name = tensor("op_2679_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2679_end_0 = const()[name = tensor("op_2679_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2679_end_mask_0 = const()[name = tensor("op_2679_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2679_cast_fp16 = slice_by_index(begin = var_2679_begin_0, end = var_2679_end_0, end_mask = var_2679_end_mask_0, x = var_2642_cast_fp16)[name = tensor("op_2679_cast_fp16")]; tensor var_2680_begin_0 = const()[name = tensor("op_2680_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2680_end_0 = const()[name = tensor("op_2680_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2680_end_mask_0 = const()[name = tensor("op_2680_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2680_cast_fp16 = slice_by_index(begin = var_2680_begin_0, end = var_2680_end_0, end_mask = var_2680_end_mask_0, x = var_2642_cast_fp16)[name = tensor("op_2680_cast_fp16")]; tensor var_2681_begin_0 = const()[name = tensor("op_2681_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2681_end_0 = const()[name = tensor("op_2681_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2681_end_mask_0 = const()[name = tensor("op_2681_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2681_cast_fp16 = slice_by_index(begin = var_2681_begin_0, end = var_2681_end_0, end_mask = var_2681_end_mask_0, x = var_2642_cast_fp16)[name = tensor("op_2681_cast_fp16")]; tensor var_2682_begin_0 = const()[name = tensor("op_2682_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2682_end_0 = const()[name = tensor("op_2682_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2682_end_mask_0 = const()[name = tensor("op_2682_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2682_cast_fp16 = slice_by_index(begin = var_2682_begin_0, end = var_2682_end_0, end_mask = var_2682_end_mask_0, x = var_2642_cast_fp16)[name = tensor("op_2682_cast_fp16")]; tensor var_2683_begin_0 = const()[name = tensor("op_2683_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2683_end_0 = const()[name = tensor("op_2683_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2683_end_mask_0 = const()[name = tensor("op_2683_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2683_cast_fp16 = slice_by_index(begin = var_2683_begin_0, end = var_2683_end_0, end_mask = var_2683_end_mask_0, x = var_2646_cast_fp16)[name = tensor("op_2683_cast_fp16")]; tensor var_2684_begin_0 = const()[name = tensor("op_2684_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2684_end_0 = const()[name = tensor("op_2684_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2684_end_mask_0 = const()[name = tensor("op_2684_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2684_cast_fp16 = slice_by_index(begin = var_2684_begin_0, end = var_2684_end_0, end_mask = var_2684_end_mask_0, x = var_2646_cast_fp16)[name = tensor("op_2684_cast_fp16")]; tensor var_2685_begin_0 = const()[name = tensor("op_2685_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2685_end_0 = const()[name = tensor("op_2685_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2685_end_mask_0 = const()[name = tensor("op_2685_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2685_cast_fp16 = slice_by_index(begin = var_2685_begin_0, end = var_2685_end_0, end_mask = var_2685_end_mask_0, x = var_2646_cast_fp16)[name = tensor("op_2685_cast_fp16")]; tensor var_2686_begin_0 = const()[name = tensor("op_2686_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2686_end_0 = const()[name = tensor("op_2686_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2686_end_mask_0 = const()[name = tensor("op_2686_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2686_cast_fp16 = slice_by_index(begin = var_2686_begin_0, end = var_2686_end_0, end_mask = var_2686_end_mask_0, x = var_2646_cast_fp16)[name = tensor("op_2686_cast_fp16")]; tensor var_2687_begin_0 = const()[name = tensor("op_2687_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2687_end_0 = const()[name = tensor("op_2687_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2687_end_mask_0 = const()[name = tensor("op_2687_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2687_cast_fp16 = slice_by_index(begin = var_2687_begin_0, end = var_2687_end_0, end_mask = var_2687_end_mask_0, x = var_2646_cast_fp16)[name = tensor("op_2687_cast_fp16")]; tensor var_2688_begin_0 = const()[name = tensor("op_2688_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2688_end_0 = const()[name = tensor("op_2688_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2688_end_mask_0 = const()[name = tensor("op_2688_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2688_cast_fp16 = slice_by_index(begin = var_2688_begin_0, end = var_2688_end_0, end_mask = var_2688_end_mask_0, x = var_2646_cast_fp16)[name = tensor("op_2688_cast_fp16")]; tensor var_2689_begin_0 = const()[name = tensor("op_2689_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2689_end_0 = const()[name = tensor("op_2689_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2689_end_mask_0 = const()[name = tensor("op_2689_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2689_cast_fp16 = slice_by_index(begin = var_2689_begin_0, end = var_2689_end_0, end_mask = var_2689_end_mask_0, x = var_2650_cast_fp16)[name = tensor("op_2689_cast_fp16")]; tensor var_2690_begin_0 = const()[name = tensor("op_2690_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2690_end_0 = const()[name = tensor("op_2690_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2690_end_mask_0 = const()[name = tensor("op_2690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2690_cast_fp16 = slice_by_index(begin = var_2690_begin_0, end = var_2690_end_0, end_mask = var_2690_end_mask_0, x = var_2650_cast_fp16)[name = tensor("op_2690_cast_fp16")]; tensor var_2691_begin_0 = const()[name = tensor("op_2691_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2691_end_0 = const()[name = tensor("op_2691_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2691_end_mask_0 = const()[name = tensor("op_2691_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2691_cast_fp16 = slice_by_index(begin = var_2691_begin_0, end = var_2691_end_0, end_mask = var_2691_end_mask_0, x = var_2650_cast_fp16)[name = tensor("op_2691_cast_fp16")]; tensor var_2692_begin_0 = const()[name = tensor("op_2692_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2692_end_0 = const()[name = tensor("op_2692_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2692_end_mask_0 = const()[name = tensor("op_2692_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2692_cast_fp16 = slice_by_index(begin = var_2692_begin_0, end = var_2692_end_0, end_mask = var_2692_end_mask_0, x = var_2650_cast_fp16)[name = tensor("op_2692_cast_fp16")]; tensor var_2693_begin_0 = const()[name = tensor("op_2693_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2693_end_0 = const()[name = tensor("op_2693_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2693_end_mask_0 = const()[name = tensor("op_2693_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2693_cast_fp16 = slice_by_index(begin = var_2693_begin_0, end = var_2693_end_0, end_mask = var_2693_end_mask_0, x = var_2650_cast_fp16)[name = tensor("op_2693_cast_fp16")]; tensor var_2694_begin_0 = const()[name = tensor("op_2694_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2694_end_0 = const()[name = tensor("op_2694_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2694_end_mask_0 = const()[name = tensor("op_2694_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2694_cast_fp16 = slice_by_index(begin = var_2694_begin_0, end = var_2694_end_0, end_mask = var_2694_end_mask_0, x = var_2650_cast_fp16)[name = tensor("op_2694_cast_fp16")]; tensor var_2695_begin_0 = const()[name = tensor("op_2695_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2695_end_0 = const()[name = tensor("op_2695_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2695_end_mask_0 = const()[name = tensor("op_2695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2695_cast_fp16 = slice_by_index(begin = var_2695_begin_0, end = var_2695_end_0, end_mask = var_2695_end_mask_0, x = var_2654_cast_fp16)[name = tensor("op_2695_cast_fp16")]; tensor var_2696_begin_0 = const()[name = tensor("op_2696_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2696_end_0 = const()[name = tensor("op_2696_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2696_end_mask_0 = const()[name = tensor("op_2696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2696_cast_fp16 = slice_by_index(begin = var_2696_begin_0, end = var_2696_end_0, end_mask = var_2696_end_mask_0, x = var_2654_cast_fp16)[name = tensor("op_2696_cast_fp16")]; tensor var_2697_begin_0 = const()[name = tensor("op_2697_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2697_end_0 = const()[name = tensor("op_2697_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2697_end_mask_0 = const()[name = tensor("op_2697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2697_cast_fp16 = slice_by_index(begin = var_2697_begin_0, end = var_2697_end_0, end_mask = var_2697_end_mask_0, x = var_2654_cast_fp16)[name = tensor("op_2697_cast_fp16")]; tensor var_2698_begin_0 = const()[name = tensor("op_2698_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2698_end_0 = const()[name = tensor("op_2698_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2698_end_mask_0 = const()[name = tensor("op_2698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2698_cast_fp16 = slice_by_index(begin = var_2698_begin_0, end = var_2698_end_0, end_mask = var_2698_end_mask_0, x = var_2654_cast_fp16)[name = tensor("op_2698_cast_fp16")]; tensor var_2699_begin_0 = const()[name = tensor("op_2699_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2699_end_0 = const()[name = tensor("op_2699_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2699_end_mask_0 = const()[name = tensor("op_2699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2699_cast_fp16 = slice_by_index(begin = var_2699_begin_0, end = var_2699_end_0, end_mask = var_2699_end_mask_0, x = var_2654_cast_fp16)[name = tensor("op_2699_cast_fp16")]; tensor var_2700_begin_0 = const()[name = tensor("op_2700_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2700_end_0 = const()[name = tensor("op_2700_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2700_end_mask_0 = const()[name = tensor("op_2700_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2700_cast_fp16 = slice_by_index(begin = var_2700_begin_0, end = var_2700_end_0, end_mask = var_2700_end_mask_0, x = var_2654_cast_fp16)[name = tensor("op_2700_cast_fp16")]; tensor var_2701_begin_0 = const()[name = tensor("op_2701_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2701_end_0 = const()[name = tensor("op_2701_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2701_end_mask_0 = const()[name = tensor("op_2701_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2701_cast_fp16 = slice_by_index(begin = var_2701_begin_0, end = var_2701_end_0, end_mask = var_2701_end_mask_0, x = var_2658_cast_fp16)[name = tensor("op_2701_cast_fp16")]; tensor var_2702_begin_0 = const()[name = tensor("op_2702_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2702_end_0 = const()[name = tensor("op_2702_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2702_end_mask_0 = const()[name = tensor("op_2702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2702_cast_fp16 = slice_by_index(begin = var_2702_begin_0, end = var_2702_end_0, end_mask = var_2702_end_mask_0, x = var_2658_cast_fp16)[name = tensor("op_2702_cast_fp16")]; tensor var_2703_begin_0 = const()[name = tensor("op_2703_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2703_end_0 = const()[name = tensor("op_2703_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2703_end_mask_0 = const()[name = tensor("op_2703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2703_cast_fp16 = slice_by_index(begin = var_2703_begin_0, end = var_2703_end_0, end_mask = var_2703_end_mask_0, x = var_2658_cast_fp16)[name = tensor("op_2703_cast_fp16")]; tensor var_2704_begin_0 = const()[name = tensor("op_2704_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2704_end_0 = const()[name = tensor("op_2704_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2704_end_mask_0 = const()[name = tensor("op_2704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2704_cast_fp16 = slice_by_index(begin = var_2704_begin_0, end = var_2704_end_0, end_mask = var_2704_end_mask_0, x = var_2658_cast_fp16)[name = tensor("op_2704_cast_fp16")]; tensor var_2705_begin_0 = const()[name = tensor("op_2705_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2705_end_0 = const()[name = tensor("op_2705_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2705_end_mask_0 = const()[name = tensor("op_2705_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2705_cast_fp16 = slice_by_index(begin = var_2705_begin_0, end = var_2705_end_0, end_mask = var_2705_end_mask_0, x = var_2658_cast_fp16)[name = tensor("op_2705_cast_fp16")]; tensor var_2706_begin_0 = const()[name = tensor("op_2706_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2706_end_0 = const()[name = tensor("op_2706_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2706_end_mask_0 = const()[name = tensor("op_2706_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2706_cast_fp16 = slice_by_index(begin = var_2706_begin_0, end = var_2706_end_0, end_mask = var_2706_end_mask_0, x = var_2658_cast_fp16)[name = tensor("op_2706_cast_fp16")]; tensor var_2707_begin_0 = const()[name = tensor("op_2707_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2707_end_0 = const()[name = tensor("op_2707_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2707_end_mask_0 = const()[name = tensor("op_2707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2707_cast_fp16 = slice_by_index(begin = var_2707_begin_0, end = var_2707_end_0, end_mask = var_2707_end_mask_0, x = var_2662_cast_fp16)[name = tensor("op_2707_cast_fp16")]; tensor var_2708_begin_0 = const()[name = tensor("op_2708_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2708_end_0 = const()[name = tensor("op_2708_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2708_end_mask_0 = const()[name = tensor("op_2708_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2708_cast_fp16 = slice_by_index(begin = var_2708_begin_0, end = var_2708_end_0, end_mask = var_2708_end_mask_0, x = var_2662_cast_fp16)[name = tensor("op_2708_cast_fp16")]; tensor var_2709_begin_0 = const()[name = tensor("op_2709_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2709_end_0 = const()[name = tensor("op_2709_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2709_end_mask_0 = const()[name = tensor("op_2709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2709_cast_fp16 = slice_by_index(begin = var_2709_begin_0, end = var_2709_end_0, end_mask = var_2709_end_mask_0, x = var_2662_cast_fp16)[name = tensor("op_2709_cast_fp16")]; tensor var_2710_begin_0 = const()[name = tensor("op_2710_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2710_end_0 = const()[name = tensor("op_2710_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2710_end_mask_0 = const()[name = tensor("op_2710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2710_cast_fp16 = slice_by_index(begin = var_2710_begin_0, end = var_2710_end_0, end_mask = var_2710_end_mask_0, x = var_2662_cast_fp16)[name = tensor("op_2710_cast_fp16")]; tensor var_2711_begin_0 = const()[name = tensor("op_2711_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2711_end_0 = const()[name = tensor("op_2711_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2711_end_mask_0 = const()[name = tensor("op_2711_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2711_cast_fp16 = slice_by_index(begin = var_2711_begin_0, end = var_2711_end_0, end_mask = var_2711_end_mask_0, x = var_2662_cast_fp16)[name = tensor("op_2711_cast_fp16")]; tensor var_2712_begin_0 = const()[name = tensor("op_2712_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2712_end_0 = const()[name = tensor("op_2712_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2712_end_mask_0 = const()[name = tensor("op_2712_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2712_cast_fp16 = slice_by_index(begin = var_2712_begin_0, end = var_2712_end_0, end_mask = var_2712_end_mask_0, x = var_2662_cast_fp16)[name = tensor("op_2712_cast_fp16")]; tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_2717_begin_0 = const()[name = tensor("op_2717_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2717_end_0 = const()[name = tensor("op_2717_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_2717_end_mask_0 = const()[name = tensor("op_2717_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor("transpose_1")]; tensor var_2717_cast_fp16 = slice_by_index(begin = var_2717_begin_0, end = var_2717_end_0, end_mask = var_2717_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2717_cast_fp16")]; tensor var_2721_begin_0 = const()[name = tensor("op_2721_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_2721_end_0 = const()[name = tensor("op_2721_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_2721_end_mask_0 = const()[name = tensor("op_2721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2721_cast_fp16 = slice_by_index(begin = var_2721_begin_0, end = var_2721_end_0, end_mask = var_2721_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2721_cast_fp16")]; tensor var_2725_begin_0 = const()[name = tensor("op_2725_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_2725_end_0 = const()[name = tensor("op_2725_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_2725_end_mask_0 = const()[name = tensor("op_2725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2725_cast_fp16 = slice_by_index(begin = var_2725_begin_0, end = var_2725_end_0, end_mask = var_2725_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2725_cast_fp16")]; tensor var_2729_begin_0 = const()[name = tensor("op_2729_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_2729_end_0 = const()[name = tensor("op_2729_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_2729_end_mask_0 = const()[name = tensor("op_2729_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2729_cast_fp16 = slice_by_index(begin = var_2729_begin_0, end = var_2729_end_0, end_mask = var_2729_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2729_cast_fp16")]; tensor var_2733_begin_0 = const()[name = tensor("op_2733_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2733_end_0 = const()[name = tensor("op_2733_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_2733_end_mask_0 = const()[name = tensor("op_2733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2733_cast_fp16 = slice_by_index(begin = var_2733_begin_0, end = var_2733_end_0, end_mask = var_2733_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2733_cast_fp16")]; tensor var_2737_begin_0 = const()[name = tensor("op_2737_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_2737_end_0 = const()[name = tensor("op_2737_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_2737_end_mask_0 = const()[name = tensor("op_2737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2737_cast_fp16 = slice_by_index(begin = var_2737_begin_0, end = var_2737_end_0, end_mask = var_2737_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2737_cast_fp16")]; tensor var_2741_begin_0 = const()[name = tensor("op_2741_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_2741_end_0 = const()[name = tensor("op_2741_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_2741_end_mask_0 = const()[name = tensor("op_2741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2741_cast_fp16 = slice_by_index(begin = var_2741_begin_0, end = var_2741_end_0, end_mask = var_2741_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2741_cast_fp16")]; tensor var_2745_begin_0 = const()[name = tensor("op_2745_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_2745_end_0 = const()[name = tensor("op_2745_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_2745_end_mask_0 = const()[name = tensor("op_2745_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2745_cast_fp16 = slice_by_index(begin = var_2745_begin_0, end = var_2745_end_0, end_mask = var_2745_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_2745_cast_fp16")]; tensor var_2747_begin_0 = const()[name = tensor("op_2747_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2747_end_0 = const()[name = tensor("op_2747_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2747_end_mask_0 = const()[name = tensor("op_2747_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2747_cast_fp16 = slice_by_index(begin = var_2747_begin_0, end = var_2747_end_0, end_mask = var_2747_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2747_cast_fp16")]; tensor var_2751_begin_0 = const()[name = tensor("op_2751_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2751_end_0 = const()[name = tensor("op_2751_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2751_end_mask_0 = const()[name = tensor("op_2751_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2751_cast_fp16 = slice_by_index(begin = var_2751_begin_0, end = var_2751_end_0, end_mask = var_2751_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2751_cast_fp16")]; tensor var_2755_begin_0 = const()[name = tensor("op_2755_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2755_end_0 = const()[name = tensor("op_2755_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2755_end_mask_0 = const()[name = tensor("op_2755_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2755_cast_fp16 = slice_by_index(begin = var_2755_begin_0, end = var_2755_end_0, end_mask = var_2755_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2755_cast_fp16")]; tensor var_2759_begin_0 = const()[name = tensor("op_2759_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2759_end_0 = const()[name = tensor("op_2759_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2759_end_mask_0 = const()[name = tensor("op_2759_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2759_cast_fp16 = slice_by_index(begin = var_2759_begin_0, end = var_2759_end_0, end_mask = var_2759_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2759_cast_fp16")]; tensor var_2763_begin_0 = const()[name = tensor("op_2763_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2763_end_0 = const()[name = tensor("op_2763_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2763_end_mask_0 = const()[name = tensor("op_2763_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2763_cast_fp16 = slice_by_index(begin = var_2763_begin_0, end = var_2763_end_0, end_mask = var_2763_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2763_cast_fp16")]; tensor var_2767_begin_0 = const()[name = tensor("op_2767_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2767_end_0 = const()[name = tensor("op_2767_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2767_end_mask_0 = const()[name = tensor("op_2767_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2767_cast_fp16 = slice_by_index(begin = var_2767_begin_0, end = var_2767_end_0, end_mask = var_2767_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2767_cast_fp16")]; tensor var_2771_begin_0 = const()[name = tensor("op_2771_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2771_end_0 = const()[name = tensor("op_2771_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2771_end_mask_0 = const()[name = tensor("op_2771_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2771_cast_fp16 = slice_by_index(begin = var_2771_begin_0, end = var_2771_end_0, end_mask = var_2771_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2771_cast_fp16")]; tensor var_2775_begin_0 = const()[name = tensor("op_2775_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2775_end_0 = const()[name = tensor("op_2775_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_2775_end_mask_0 = const()[name = tensor("op_2775_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2775_cast_fp16 = slice_by_index(begin = var_2775_begin_0, end = var_2775_end_0, end_mask = var_2775_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_2775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_385_equation_0, values = (var_2717_cast_fp16, var_2665_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_387_equation_0, values = (var_2717_cast_fp16, var_2666_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_389_equation_0, values = (var_2717_cast_fp16, var_2667_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_391_equation_0, values = (var_2717_cast_fp16, var_2668_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_393_equation_0, values = (var_2717_cast_fp16, var_2669_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_395_equation_0, values = (var_2717_cast_fp16, var_2670_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_397_equation_0, values = (var_2721_cast_fp16, var_2671_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_399_equation_0, values = (var_2721_cast_fp16, var_2672_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_401_equation_0, values = (var_2721_cast_fp16, var_2673_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_403_equation_0, values = (var_2721_cast_fp16, var_2674_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_405_equation_0, values = (var_2721_cast_fp16, var_2675_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_407_equation_0, values = (var_2721_cast_fp16, var_2676_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_409_equation_0, values = (var_2725_cast_fp16, var_2677_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_411_equation_0, values = (var_2725_cast_fp16, var_2678_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_413_equation_0, values = (var_2725_cast_fp16, var_2679_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_415_equation_0, values = (var_2725_cast_fp16, var_2680_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_417_equation_0, values = (var_2725_cast_fp16, var_2681_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_419_equation_0, values = (var_2725_cast_fp16, var_2682_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_421_equation_0, values = (var_2729_cast_fp16, var_2683_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_423_equation_0, values = (var_2729_cast_fp16, var_2684_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_425_equation_0, values = (var_2729_cast_fp16, var_2685_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_427_equation_0, values = (var_2729_cast_fp16, var_2686_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_429_equation_0, values = (var_2729_cast_fp16, var_2687_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_431_equation_0, values = (var_2729_cast_fp16, var_2688_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_433_equation_0, values = (var_2733_cast_fp16, var_2689_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_435_equation_0, values = (var_2733_cast_fp16, var_2690_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_437_equation_0, values = (var_2733_cast_fp16, var_2691_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_439_equation_0, values = (var_2733_cast_fp16, var_2692_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_439_cast_fp16")]; tensor _SplitHeadsQ__mh_w_441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_441_equation_0, values = (var_2733_cast_fp16, var_2693_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_443_equation_0, values = (var_2733_cast_fp16, var_2694_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_445_equation_0, values = (var_2737_cast_fp16, var_2695_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_447_equation_0, values = (var_2737_cast_fp16, var_2696_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_449_equation_0, values = (var_2737_cast_fp16, var_2697_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_451_equation_0, values = (var_2737_cast_fp16, var_2698_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_453_equation_0, values = (var_2737_cast_fp16, var_2699_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_455_equation_0, values = (var_2737_cast_fp16, var_2700_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_457_equation_0, values = (var_2741_cast_fp16, var_2701_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_459_equation_0, values = (var_2741_cast_fp16, var_2702_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_461_equation_0, values = (var_2741_cast_fp16, var_2703_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_463_equation_0, values = (var_2741_cast_fp16, var_2704_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_465_equation_0, values = (var_2741_cast_fp16, var_2705_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_467_equation_0, values = (var_2741_cast_fp16, var_2706_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_469_equation_0, values = (var_2745_cast_fp16, var_2707_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_471_equation_0, values = (var_2745_cast_fp16, var_2708_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_473_equation_0, values = (var_2745_cast_fp16, var_2709_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_475_equation_0, values = (var_2745_cast_fp16, var_2710_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_477_equation_0, values = (var_2745_cast_fp16, var_2711_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_479_equation_0, values = (var_2745_cast_fp16, var_2712_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_479_cast_fp16")]; tensor var_2874_to_fp16 = const()[name = tensor("op_2874_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_385_cast_fp16, y = var_2874_to_fp16)[name = tensor("aw_chunk_385_cast_fp16")]; tensor var_2876_to_fp16 = const()[name = tensor("op_2876_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_387_cast_fp16, y = var_2876_to_fp16)[name = tensor("aw_chunk_387_cast_fp16")]; tensor var_2878_to_fp16 = const()[name = tensor("op_2878_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_389_cast_fp16, y = var_2878_to_fp16)[name = tensor("aw_chunk_389_cast_fp16")]; tensor var_2880_to_fp16 = const()[name = tensor("op_2880_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_391_cast_fp16, y = var_2880_to_fp16)[name = tensor("aw_chunk_391_cast_fp16")]; tensor var_2882_to_fp16 = const()[name = tensor("op_2882_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_393_cast_fp16, y = var_2882_to_fp16)[name = tensor("aw_chunk_393_cast_fp16")]; tensor var_2884_to_fp16 = const()[name = tensor("op_2884_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_395_cast_fp16, y = var_2884_to_fp16)[name = tensor("aw_chunk_395_cast_fp16")]; tensor var_2886_to_fp16 = const()[name = tensor("op_2886_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_397_cast_fp16, y = var_2886_to_fp16)[name = tensor("aw_chunk_397_cast_fp16")]; tensor var_2888_to_fp16 = const()[name = tensor("op_2888_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_399_cast_fp16, y = var_2888_to_fp16)[name = tensor("aw_chunk_399_cast_fp16")]; tensor var_2890_to_fp16 = const()[name = tensor("op_2890_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_401_cast_fp16, y = var_2890_to_fp16)[name = tensor("aw_chunk_401_cast_fp16")]; tensor var_2892_to_fp16 = const()[name = tensor("op_2892_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_403_cast_fp16, y = var_2892_to_fp16)[name = tensor("aw_chunk_403_cast_fp16")]; tensor var_2894_to_fp16 = const()[name = tensor("op_2894_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_405_cast_fp16, y = var_2894_to_fp16)[name = tensor("aw_chunk_405_cast_fp16")]; tensor var_2896_to_fp16 = const()[name = tensor("op_2896_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_407_cast_fp16, y = var_2896_to_fp16)[name = tensor("aw_chunk_407_cast_fp16")]; tensor var_2898_to_fp16 = const()[name = tensor("op_2898_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_409_cast_fp16, y = var_2898_to_fp16)[name = tensor("aw_chunk_409_cast_fp16")]; tensor var_2900_to_fp16 = const()[name = tensor("op_2900_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_411_cast_fp16, y = var_2900_to_fp16)[name = tensor("aw_chunk_411_cast_fp16")]; tensor var_2902_to_fp16 = const()[name = tensor("op_2902_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_413_cast_fp16, y = var_2902_to_fp16)[name = tensor("aw_chunk_413_cast_fp16")]; tensor var_2904_to_fp16 = const()[name = tensor("op_2904_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_415_cast_fp16, y = var_2904_to_fp16)[name = tensor("aw_chunk_415_cast_fp16")]; tensor var_2906_to_fp16 = const()[name = tensor("op_2906_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_417_cast_fp16, y = var_2906_to_fp16)[name = tensor("aw_chunk_417_cast_fp16")]; tensor var_2908_to_fp16 = const()[name = tensor("op_2908_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_419_cast_fp16, y = var_2908_to_fp16)[name = tensor("aw_chunk_419_cast_fp16")]; tensor var_2910_to_fp16 = const()[name = tensor("op_2910_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_421_cast_fp16, y = var_2910_to_fp16)[name = tensor("aw_chunk_421_cast_fp16")]; tensor var_2912_to_fp16 = const()[name = tensor("op_2912_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_423_cast_fp16, y = var_2912_to_fp16)[name = tensor("aw_chunk_423_cast_fp16")]; tensor var_2914_to_fp16 = const()[name = tensor("op_2914_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_425_cast_fp16, y = var_2914_to_fp16)[name = tensor("aw_chunk_425_cast_fp16")]; tensor var_2916_to_fp16 = const()[name = tensor("op_2916_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_427_cast_fp16, y = var_2916_to_fp16)[name = tensor("aw_chunk_427_cast_fp16")]; tensor var_2918_to_fp16 = const()[name = tensor("op_2918_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_429_cast_fp16, y = var_2918_to_fp16)[name = tensor("aw_chunk_429_cast_fp16")]; tensor var_2920_to_fp16 = const()[name = tensor("op_2920_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_431_cast_fp16, y = var_2920_to_fp16)[name = tensor("aw_chunk_431_cast_fp16")]; tensor var_2922_to_fp16 = const()[name = tensor("op_2922_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_433_cast_fp16, y = var_2922_to_fp16)[name = tensor("aw_chunk_433_cast_fp16")]; tensor var_2924_to_fp16 = const()[name = tensor("op_2924_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_435_cast_fp16, y = var_2924_to_fp16)[name = tensor("aw_chunk_435_cast_fp16")]; tensor var_2926_to_fp16 = const()[name = tensor("op_2926_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_437_cast_fp16, y = var_2926_to_fp16)[name = tensor("aw_chunk_437_cast_fp16")]; tensor var_2928_to_fp16 = const()[name = tensor("op_2928_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_439_cast_fp16, y = var_2928_to_fp16)[name = tensor("aw_chunk_439_cast_fp16")]; tensor var_2930_to_fp16 = const()[name = tensor("op_2930_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_441_cast_fp16, y = var_2930_to_fp16)[name = tensor("aw_chunk_441_cast_fp16")]; tensor var_2932_to_fp16 = const()[name = tensor("op_2932_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_443_cast_fp16, y = var_2932_to_fp16)[name = tensor("aw_chunk_443_cast_fp16")]; tensor var_2934_to_fp16 = const()[name = tensor("op_2934_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_445_cast_fp16, y = var_2934_to_fp16)[name = tensor("aw_chunk_445_cast_fp16")]; tensor var_2936_to_fp16 = const()[name = tensor("op_2936_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_447_cast_fp16, y = var_2936_to_fp16)[name = tensor("aw_chunk_447_cast_fp16")]; tensor var_2938_to_fp16 = const()[name = tensor("op_2938_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_449_cast_fp16, y = var_2938_to_fp16)[name = tensor("aw_chunk_449_cast_fp16")]; tensor var_2940_to_fp16 = const()[name = tensor("op_2940_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_451_cast_fp16, y = var_2940_to_fp16)[name = tensor("aw_chunk_451_cast_fp16")]; tensor var_2942_to_fp16 = const()[name = tensor("op_2942_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_453_cast_fp16, y = var_2942_to_fp16)[name = tensor("aw_chunk_453_cast_fp16")]; tensor var_2944_to_fp16 = const()[name = tensor("op_2944_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_455_cast_fp16, y = var_2944_to_fp16)[name = tensor("aw_chunk_455_cast_fp16")]; tensor var_2946_to_fp16 = const()[name = tensor("op_2946_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_457_cast_fp16, y = var_2946_to_fp16)[name = tensor("aw_chunk_457_cast_fp16")]; tensor var_2948_to_fp16 = const()[name = tensor("op_2948_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_459_cast_fp16, y = var_2948_to_fp16)[name = tensor("aw_chunk_459_cast_fp16")]; tensor var_2950_to_fp16 = const()[name = tensor("op_2950_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_461_cast_fp16, y = var_2950_to_fp16)[name = tensor("aw_chunk_461_cast_fp16")]; tensor var_2952_to_fp16 = const()[name = tensor("op_2952_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_463_cast_fp16, y = var_2952_to_fp16)[name = tensor("aw_chunk_463_cast_fp16")]; tensor var_2954_to_fp16 = const()[name = tensor("op_2954_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_465_cast_fp16, y = var_2954_to_fp16)[name = tensor("aw_chunk_465_cast_fp16")]; tensor var_2956_to_fp16 = const()[name = tensor("op_2956_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_467_cast_fp16, y = var_2956_to_fp16)[name = tensor("aw_chunk_467_cast_fp16")]; tensor var_2958_to_fp16 = const()[name = tensor("op_2958_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_469_cast_fp16, y = var_2958_to_fp16)[name = tensor("aw_chunk_469_cast_fp16")]; tensor var_2960_to_fp16 = const()[name = tensor("op_2960_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_471_cast_fp16, y = var_2960_to_fp16)[name = tensor("aw_chunk_471_cast_fp16")]; tensor var_2962_to_fp16 = const()[name = tensor("op_2962_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_473_cast_fp16, y = var_2962_to_fp16)[name = tensor("aw_chunk_473_cast_fp16")]; tensor var_2964_to_fp16 = const()[name = tensor("op_2964_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_475_cast_fp16, y = var_2964_to_fp16)[name = tensor("aw_chunk_475_cast_fp16")]; tensor var_2966_to_fp16 = const()[name = tensor("op_2966_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_477_cast_fp16, y = var_2966_to_fp16)[name = tensor("aw_chunk_477_cast_fp16")]; tensor var_2968_to_fp16 = const()[name = tensor("op_2968_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_479_cast_fp16, y = var_2968_to_fp16)[name = tensor("aw_chunk_479_cast_fp16")]; tensor var_2970_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_385_cast_fp16)[name = tensor("op_2970_cast_fp16")]; tensor var_2971_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_387_cast_fp16)[name = tensor("op_2971_cast_fp16")]; tensor var_2972_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_389_cast_fp16)[name = tensor("op_2972_cast_fp16")]; tensor var_2973_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_391_cast_fp16)[name = tensor("op_2973_cast_fp16")]; tensor var_2974_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_393_cast_fp16)[name = tensor("op_2974_cast_fp16")]; tensor var_2975_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_395_cast_fp16)[name = tensor("op_2975_cast_fp16")]; tensor var_2976_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_397_cast_fp16)[name = tensor("op_2976_cast_fp16")]; tensor var_2977_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_399_cast_fp16)[name = tensor("op_2977_cast_fp16")]; tensor var_2978_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_401_cast_fp16)[name = tensor("op_2978_cast_fp16")]; tensor var_2979_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_403_cast_fp16)[name = tensor("op_2979_cast_fp16")]; tensor var_2980_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_405_cast_fp16)[name = tensor("op_2980_cast_fp16")]; tensor var_2981_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_407_cast_fp16)[name = tensor("op_2981_cast_fp16")]; tensor var_2982_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_409_cast_fp16)[name = tensor("op_2982_cast_fp16")]; tensor var_2983_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_411_cast_fp16)[name = tensor("op_2983_cast_fp16")]; tensor var_2984_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_413_cast_fp16)[name = tensor("op_2984_cast_fp16")]; tensor var_2985_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_415_cast_fp16)[name = tensor("op_2985_cast_fp16")]; tensor var_2986_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_417_cast_fp16)[name = tensor("op_2986_cast_fp16")]; tensor var_2987_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_419_cast_fp16)[name = tensor("op_2987_cast_fp16")]; tensor var_2988_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_421_cast_fp16)[name = tensor("op_2988_cast_fp16")]; tensor var_2989_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_423_cast_fp16)[name = tensor("op_2989_cast_fp16")]; tensor var_2990_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_425_cast_fp16)[name = tensor("op_2990_cast_fp16")]; tensor var_2991_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_427_cast_fp16)[name = tensor("op_2991_cast_fp16")]; tensor var_2992_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_429_cast_fp16)[name = tensor("op_2992_cast_fp16")]; tensor var_2993_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_431_cast_fp16)[name = tensor("op_2993_cast_fp16")]; tensor var_2994_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_433_cast_fp16)[name = tensor("op_2994_cast_fp16")]; tensor var_2995_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_435_cast_fp16)[name = tensor("op_2995_cast_fp16")]; tensor var_2996_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_437_cast_fp16)[name = tensor("op_2996_cast_fp16")]; tensor var_2997_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_439_cast_fp16)[name = tensor("op_2997_cast_fp16")]; tensor var_2998_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_441_cast_fp16)[name = tensor("op_2998_cast_fp16")]; tensor var_2999_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_443_cast_fp16)[name = tensor("op_2999_cast_fp16")]; tensor var_3000_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_445_cast_fp16)[name = tensor("op_3000_cast_fp16")]; tensor var_3001_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_447_cast_fp16)[name = tensor("op_3001_cast_fp16")]; tensor var_3002_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_449_cast_fp16)[name = tensor("op_3002_cast_fp16")]; tensor var_3003_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_451_cast_fp16)[name = tensor("op_3003_cast_fp16")]; tensor var_3004_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_453_cast_fp16)[name = tensor("op_3004_cast_fp16")]; tensor var_3005_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_455_cast_fp16)[name = tensor("op_3005_cast_fp16")]; tensor var_3006_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_457_cast_fp16)[name = tensor("op_3006_cast_fp16")]; tensor var_3007_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_459_cast_fp16)[name = tensor("op_3007_cast_fp16")]; tensor var_3008_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_461_cast_fp16)[name = tensor("op_3008_cast_fp16")]; tensor var_3009_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_463_cast_fp16)[name = tensor("op_3009_cast_fp16")]; tensor var_3010_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_465_cast_fp16)[name = tensor("op_3010_cast_fp16")]; tensor var_3011_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_467_cast_fp16)[name = tensor("op_3011_cast_fp16")]; tensor var_3012_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_469_cast_fp16)[name = tensor("op_3012_cast_fp16")]; tensor var_3013_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_471_cast_fp16)[name = tensor("op_3013_cast_fp16")]; tensor var_3014_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_473_cast_fp16)[name = tensor("op_3014_cast_fp16")]; tensor var_3015_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_475_cast_fp16)[name = tensor("op_3015_cast_fp16")]; tensor var_3016_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_477_cast_fp16)[name = tensor("op_3016_cast_fp16")]; tensor var_3017_cast_fp16 = softmax(axis = var_2582, x = aw_chunk_479_cast_fp16)[name = tensor("op_3017_cast_fp16")]; tensor var_3019_equation_0 = const()[name = tensor("op_3019_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3019_cast_fp16 = einsum(equation = var_3019_equation_0, values = (var_2747_cast_fp16, var_2970_cast_fp16))[name = tensor("op_3019_cast_fp16")]; tensor var_3021_equation_0 = const()[name = tensor("op_3021_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3021_cast_fp16 = einsum(equation = var_3021_equation_0, values = (var_2747_cast_fp16, var_2971_cast_fp16))[name = tensor("op_3021_cast_fp16")]; tensor var_3023_equation_0 = const()[name = tensor("op_3023_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3023_cast_fp16 = einsum(equation = var_3023_equation_0, values = (var_2747_cast_fp16, var_2972_cast_fp16))[name = tensor("op_3023_cast_fp16")]; tensor var_3025_equation_0 = const()[name = tensor("op_3025_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3025_cast_fp16 = einsum(equation = var_3025_equation_0, values = (var_2747_cast_fp16, var_2973_cast_fp16))[name = tensor("op_3025_cast_fp16")]; tensor var_3027_equation_0 = const()[name = tensor("op_3027_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3027_cast_fp16 = einsum(equation = var_3027_equation_0, values = (var_2747_cast_fp16, var_2974_cast_fp16))[name = tensor("op_3027_cast_fp16")]; tensor var_3029_equation_0 = const()[name = tensor("op_3029_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3029_cast_fp16 = einsum(equation = var_3029_equation_0, values = (var_2747_cast_fp16, var_2975_cast_fp16))[name = tensor("op_3029_cast_fp16")]; tensor var_3031_equation_0 = const()[name = tensor("op_3031_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3031_cast_fp16 = einsum(equation = var_3031_equation_0, values = (var_2751_cast_fp16, var_2976_cast_fp16))[name = tensor("op_3031_cast_fp16")]; tensor var_3033_equation_0 = const()[name = tensor("op_3033_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3033_cast_fp16 = einsum(equation = var_3033_equation_0, values = (var_2751_cast_fp16, var_2977_cast_fp16))[name = tensor("op_3033_cast_fp16")]; tensor var_3035_equation_0 = const()[name = tensor("op_3035_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3035_cast_fp16 = einsum(equation = var_3035_equation_0, values = (var_2751_cast_fp16, var_2978_cast_fp16))[name = tensor("op_3035_cast_fp16")]; tensor var_3037_equation_0 = const()[name = tensor("op_3037_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3037_cast_fp16 = einsum(equation = var_3037_equation_0, values = (var_2751_cast_fp16, var_2979_cast_fp16))[name = tensor("op_3037_cast_fp16")]; tensor var_3039_equation_0 = const()[name = tensor("op_3039_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3039_cast_fp16 = einsum(equation = var_3039_equation_0, values = (var_2751_cast_fp16, var_2980_cast_fp16))[name = tensor("op_3039_cast_fp16")]; tensor var_3041_equation_0 = const()[name = tensor("op_3041_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3041_cast_fp16 = einsum(equation = var_3041_equation_0, values = (var_2751_cast_fp16, var_2981_cast_fp16))[name = tensor("op_3041_cast_fp16")]; tensor var_3043_equation_0 = const()[name = tensor("op_3043_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3043_cast_fp16 = einsum(equation = var_3043_equation_0, values = (var_2755_cast_fp16, var_2982_cast_fp16))[name = tensor("op_3043_cast_fp16")]; tensor var_3045_equation_0 = const()[name = tensor("op_3045_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3045_cast_fp16 = einsum(equation = var_3045_equation_0, values = (var_2755_cast_fp16, var_2983_cast_fp16))[name = tensor("op_3045_cast_fp16")]; tensor var_3047_equation_0 = const()[name = tensor("op_3047_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3047_cast_fp16 = einsum(equation = var_3047_equation_0, values = (var_2755_cast_fp16, var_2984_cast_fp16))[name = tensor("op_3047_cast_fp16")]; tensor var_3049_equation_0 = const()[name = tensor("op_3049_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3049_cast_fp16 = einsum(equation = var_3049_equation_0, values = (var_2755_cast_fp16, var_2985_cast_fp16))[name = tensor("op_3049_cast_fp16")]; tensor var_3051_equation_0 = const()[name = tensor("op_3051_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3051_cast_fp16 = einsum(equation = var_3051_equation_0, values = (var_2755_cast_fp16, var_2986_cast_fp16))[name = tensor("op_3051_cast_fp16")]; tensor var_3053_equation_0 = const()[name = tensor("op_3053_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3053_cast_fp16 = einsum(equation = var_3053_equation_0, values = (var_2755_cast_fp16, var_2987_cast_fp16))[name = tensor("op_3053_cast_fp16")]; tensor var_3055_equation_0 = const()[name = tensor("op_3055_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3055_cast_fp16 = einsum(equation = var_3055_equation_0, values = (var_2759_cast_fp16, var_2988_cast_fp16))[name = tensor("op_3055_cast_fp16")]; tensor var_3057_equation_0 = const()[name = tensor("op_3057_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3057_cast_fp16 = einsum(equation = var_3057_equation_0, values = (var_2759_cast_fp16, var_2989_cast_fp16))[name = tensor("op_3057_cast_fp16")]; tensor var_3059_equation_0 = const()[name = tensor("op_3059_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3059_cast_fp16 = einsum(equation = var_3059_equation_0, values = (var_2759_cast_fp16, var_2990_cast_fp16))[name = tensor("op_3059_cast_fp16")]; tensor var_3061_equation_0 = const()[name = tensor("op_3061_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3061_cast_fp16 = einsum(equation = var_3061_equation_0, values = (var_2759_cast_fp16, var_2991_cast_fp16))[name = tensor("op_3061_cast_fp16")]; tensor var_3063_equation_0 = const()[name = tensor("op_3063_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3063_cast_fp16 = einsum(equation = var_3063_equation_0, values = (var_2759_cast_fp16, var_2992_cast_fp16))[name = tensor("op_3063_cast_fp16")]; tensor var_3065_equation_0 = const()[name = tensor("op_3065_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3065_cast_fp16 = einsum(equation = var_3065_equation_0, values = (var_2759_cast_fp16, var_2993_cast_fp16))[name = tensor("op_3065_cast_fp16")]; tensor var_3067_equation_0 = const()[name = tensor("op_3067_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3067_cast_fp16 = einsum(equation = var_3067_equation_0, values = (var_2763_cast_fp16, var_2994_cast_fp16))[name = tensor("op_3067_cast_fp16")]; tensor var_3069_equation_0 = const()[name = tensor("op_3069_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3069_cast_fp16 = einsum(equation = var_3069_equation_0, values = (var_2763_cast_fp16, var_2995_cast_fp16))[name = tensor("op_3069_cast_fp16")]; tensor var_3071_equation_0 = const()[name = tensor("op_3071_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3071_cast_fp16 = einsum(equation = var_3071_equation_0, values = (var_2763_cast_fp16, var_2996_cast_fp16))[name = tensor("op_3071_cast_fp16")]; tensor var_3073_equation_0 = const()[name = tensor("op_3073_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3073_cast_fp16 = einsum(equation = var_3073_equation_0, values = (var_2763_cast_fp16, var_2997_cast_fp16))[name = tensor("op_3073_cast_fp16")]; tensor var_3075_equation_0 = const()[name = tensor("op_3075_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3075_cast_fp16 = einsum(equation = var_3075_equation_0, values = (var_2763_cast_fp16, var_2998_cast_fp16))[name = tensor("op_3075_cast_fp16")]; tensor var_3077_equation_0 = const()[name = tensor("op_3077_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3077_cast_fp16 = einsum(equation = var_3077_equation_0, values = (var_2763_cast_fp16, var_2999_cast_fp16))[name = tensor("op_3077_cast_fp16")]; tensor var_3079_equation_0 = const()[name = tensor("op_3079_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3079_cast_fp16 = einsum(equation = var_3079_equation_0, values = (var_2767_cast_fp16, var_3000_cast_fp16))[name = tensor("op_3079_cast_fp16")]; tensor var_3081_equation_0 = const()[name = tensor("op_3081_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3081_cast_fp16 = einsum(equation = var_3081_equation_0, values = (var_2767_cast_fp16, var_3001_cast_fp16))[name = tensor("op_3081_cast_fp16")]; tensor var_3083_equation_0 = const()[name = tensor("op_3083_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3083_cast_fp16 = einsum(equation = var_3083_equation_0, values = (var_2767_cast_fp16, var_3002_cast_fp16))[name = tensor("op_3083_cast_fp16")]; tensor var_3085_equation_0 = const()[name = tensor("op_3085_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3085_cast_fp16 = einsum(equation = var_3085_equation_0, values = (var_2767_cast_fp16, var_3003_cast_fp16))[name = tensor("op_3085_cast_fp16")]; tensor var_3087_equation_0 = const()[name = tensor("op_3087_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3087_cast_fp16 = einsum(equation = var_3087_equation_0, values = (var_2767_cast_fp16, var_3004_cast_fp16))[name = tensor("op_3087_cast_fp16")]; tensor var_3089_equation_0 = const()[name = tensor("op_3089_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3089_cast_fp16 = einsum(equation = var_3089_equation_0, values = (var_2767_cast_fp16, var_3005_cast_fp16))[name = tensor("op_3089_cast_fp16")]; tensor var_3091_equation_0 = const()[name = tensor("op_3091_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3091_cast_fp16 = einsum(equation = var_3091_equation_0, values = (var_2771_cast_fp16, var_3006_cast_fp16))[name = tensor("op_3091_cast_fp16")]; tensor var_3093_equation_0 = const()[name = tensor("op_3093_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3093_cast_fp16 = einsum(equation = var_3093_equation_0, values = (var_2771_cast_fp16, var_3007_cast_fp16))[name = tensor("op_3093_cast_fp16")]; tensor var_3095_equation_0 = const()[name = tensor("op_3095_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3095_cast_fp16 = einsum(equation = var_3095_equation_0, values = (var_2771_cast_fp16, var_3008_cast_fp16))[name = tensor("op_3095_cast_fp16")]; tensor var_3097_equation_0 = const()[name = tensor("op_3097_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3097_cast_fp16 = einsum(equation = var_3097_equation_0, values = (var_2771_cast_fp16, var_3009_cast_fp16))[name = tensor("op_3097_cast_fp16")]; tensor var_3099_equation_0 = const()[name = tensor("op_3099_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3099_cast_fp16 = einsum(equation = var_3099_equation_0, values = (var_2771_cast_fp16, var_3010_cast_fp16))[name = tensor("op_3099_cast_fp16")]; tensor var_3101_equation_0 = const()[name = tensor("op_3101_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3101_cast_fp16 = einsum(equation = var_3101_equation_0, values = (var_2771_cast_fp16, var_3011_cast_fp16))[name = tensor("op_3101_cast_fp16")]; tensor var_3103_equation_0 = const()[name = tensor("op_3103_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3103_cast_fp16 = einsum(equation = var_3103_equation_0, values = (var_2775_cast_fp16, var_3012_cast_fp16))[name = tensor("op_3103_cast_fp16")]; tensor var_3105_equation_0 = const()[name = tensor("op_3105_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3105_cast_fp16 = einsum(equation = var_3105_equation_0, values = (var_2775_cast_fp16, var_3013_cast_fp16))[name = tensor("op_3105_cast_fp16")]; tensor var_3107_equation_0 = const()[name = tensor("op_3107_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3107_cast_fp16 = einsum(equation = var_3107_equation_0, values = (var_2775_cast_fp16, var_3014_cast_fp16))[name = tensor("op_3107_cast_fp16")]; tensor var_3109_equation_0 = const()[name = tensor("op_3109_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3109_cast_fp16 = einsum(equation = var_3109_equation_0, values = (var_2775_cast_fp16, var_3015_cast_fp16))[name = tensor("op_3109_cast_fp16")]; tensor var_3111_equation_0 = const()[name = tensor("op_3111_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3111_cast_fp16 = einsum(equation = var_3111_equation_0, values = (var_2775_cast_fp16, var_3016_cast_fp16))[name = tensor("op_3111_cast_fp16")]; tensor var_3113_equation_0 = const()[name = tensor("op_3113_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3113_cast_fp16 = einsum(equation = var_3113_equation_0, values = (var_2775_cast_fp16, var_3017_cast_fp16))[name = tensor("op_3113_cast_fp16")]; tensor var_3115_interleave_0 = const()[name = tensor("op_3115_interleave_0"), val = tensor(false)]; tensor var_3115_cast_fp16 = concat(axis = var_2569, interleave = var_3115_interleave_0, values = (var_3019_cast_fp16, var_3021_cast_fp16, var_3023_cast_fp16, var_3025_cast_fp16, var_3027_cast_fp16, var_3029_cast_fp16))[name = tensor("op_3115_cast_fp16")]; tensor var_3117_interleave_0 = const()[name = tensor("op_3117_interleave_0"), val = tensor(false)]; tensor var_3117_cast_fp16 = concat(axis = var_2569, interleave = var_3117_interleave_0, values = (var_3031_cast_fp16, var_3033_cast_fp16, var_3035_cast_fp16, var_3037_cast_fp16, var_3039_cast_fp16, var_3041_cast_fp16))[name = tensor("op_3117_cast_fp16")]; tensor var_3119_interleave_0 = const()[name = tensor("op_3119_interleave_0"), val = tensor(false)]; tensor var_3119_cast_fp16 = concat(axis = var_2569, interleave = var_3119_interleave_0, values = (var_3043_cast_fp16, var_3045_cast_fp16, var_3047_cast_fp16, var_3049_cast_fp16, var_3051_cast_fp16, var_3053_cast_fp16))[name = tensor("op_3119_cast_fp16")]; tensor var_3121_interleave_0 = const()[name = tensor("op_3121_interleave_0"), val = tensor(false)]; tensor var_3121_cast_fp16 = concat(axis = var_2569, interleave = var_3121_interleave_0, values = (var_3055_cast_fp16, var_3057_cast_fp16, var_3059_cast_fp16, var_3061_cast_fp16, var_3063_cast_fp16, var_3065_cast_fp16))[name = tensor("op_3121_cast_fp16")]; tensor var_3123_interleave_0 = const()[name = tensor("op_3123_interleave_0"), val = tensor(false)]; tensor var_3123_cast_fp16 = concat(axis = var_2569, interleave = var_3123_interleave_0, values = (var_3067_cast_fp16, var_3069_cast_fp16, var_3071_cast_fp16, var_3073_cast_fp16, var_3075_cast_fp16, var_3077_cast_fp16))[name = tensor("op_3123_cast_fp16")]; tensor var_3125_interleave_0 = const()[name = tensor("op_3125_interleave_0"), val = tensor(false)]; tensor var_3125_cast_fp16 = concat(axis = var_2569, interleave = var_3125_interleave_0, values = (var_3079_cast_fp16, var_3081_cast_fp16, var_3083_cast_fp16, var_3085_cast_fp16, var_3087_cast_fp16, var_3089_cast_fp16))[name = tensor("op_3125_cast_fp16")]; tensor var_3127_interleave_0 = const()[name = tensor("op_3127_interleave_0"), val = tensor(false)]; tensor var_3127_cast_fp16 = concat(axis = var_2569, interleave = var_3127_interleave_0, values = (var_3091_cast_fp16, var_3093_cast_fp16, var_3095_cast_fp16, var_3097_cast_fp16, var_3099_cast_fp16, var_3101_cast_fp16))[name = tensor("op_3127_cast_fp16")]; tensor var_3129_interleave_0 = const()[name = tensor("op_3129_interleave_0"), val = tensor(false)]; tensor var_3129_cast_fp16 = concat(axis = var_2569, interleave = var_3129_interleave_0, values = (var_3103_cast_fp16, var_3105_cast_fp16, var_3107_cast_fp16, var_3109_cast_fp16, var_3111_cast_fp16, var_3113_cast_fp16))[name = tensor("op_3129_cast_fp16")]; tensor input_33_interleave_0 = const()[name = tensor("input_33_interleave_0"), val = tensor(false)]; tensor input_33_cast_fp16 = concat(axis = var_2582, interleave = input_33_interleave_0, values = (var_3115_cast_fp16, var_3117_cast_fp16, var_3119_cast_fp16, var_3121_cast_fp16, var_3123_cast_fp16, var_3125_cast_fp16, var_3127_cast_fp16, var_3129_cast_fp16))[name = tensor("input_33_cast_fp16")]; tensor obj_19_pad_type_0 = const()[name = tensor("obj_19_pad_type_0"), val = tensor("valid")]; tensor obj_19_strides_0 = const()[name = tensor("obj_19_strides_0"), val = tensor([1, 1])]; tensor obj_19_pad_0 = const()[name = tensor("obj_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_19_dilations_0 = const()[name = tensor("obj_19_dilations_0"), val = tensor([1, 1])]; tensor obj_19_groups_0 = const()[name = tensor("obj_19_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30155456)))]; tensor layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30679808)))]; tensor obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("obj_19_cast_fp16")]; tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; tensor var_3148_to_fp16 = const()[name = tensor("op_3148_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_3148_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30680896)))]; tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30681984)))]; tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor input_37_pad_type_0 = const()[name = tensor("input_37_pad_type_0"), val = tensor("valid")]; tensor input_37_strides_0 = const()[name = tensor("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = tensor("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = tensor("input_37_dilations_0"), val = tensor([1, 1])]; tensor input_37_groups_0 = const()[name = tensor("input_37_groups_0"), val = tensor(1)]; tensor layers_4_fc1_weight_to_fp16 = const()[name = tensor("layers_4_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30683072)))]; tensor layers_4_fc1_bias_to_fp16 = const()[name = tensor("layers_4_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32780288)))]; tensor input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("input_37_cast_fp16")]; tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor hidden_states_13_pad_type_0 = const()[name = tensor("hidden_states_13_pad_type_0"), val = tensor("valid")]; tensor hidden_states_13_strides_0 = const()[name = tensor("hidden_states_13_strides_0"), val = tensor([1, 1])]; tensor hidden_states_13_pad_0 = const()[name = tensor("hidden_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_13_dilations_0 = const()[name = tensor("hidden_states_13_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_13_groups_0 = const()[name = tensor("hidden_states_13_groups_0"), val = tensor(1)]; tensor layers_4_fc2_weight_to_fp16 = const()[name = tensor("layers_4_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32784448)))]; tensor layers_4_fc2_bias_to_fp16 = const()[name = tensor("layers_4_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34881664)))]; tensor hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = hidden_states_13_dilations_0, groups = hidden_states_13_groups_0, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = hidden_states_13_strides_0, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; tensor var_3180 = const()[name = tensor("op_3180"), val = tensor(3)]; tensor var_3193 = const()[name = tensor("op_3193"), val = tensor(1)]; tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; tensor var_3210_to_fp16 = const()[name = tensor("op_3210_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_3210_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34882752)))]; tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34883840)))]; tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; tensor query_pad_type_0 = const()[name = tensor("query_pad_type_0"), val = tensor("valid")]; tensor query_strides_0 = const()[name = tensor("query_strides_0"), val = tensor([1, 1])]; tensor query_pad_0 = const()[name = tensor("query_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_dilations_0 = const()[name = tensor("query_dilations_0"), val = tensor([1, 1])]; tensor query_groups_0 = const()[name = tensor("query_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34884928)))]; tensor layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35409280)))]; tensor query_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("query_cast_fp16")]; tensor key_pad_type_0 = const()[name = tensor("key_pad_type_0"), val = tensor("valid")]; tensor key_strides_0 = const()[name = tensor("key_strides_0"), val = tensor([1, 1])]; tensor key_pad_0 = const()[name = tensor("key_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_dilations_0 = const()[name = tensor("key_dilations_0"), val = tensor([1, 1])]; tensor key_groups_0 = const()[name = tensor("key_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35410368)))]; tensor key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("key_cast_fp16")]; tensor value_pad_type_0 = const()[name = tensor("value_pad_type_0"), val = tensor("valid")]; tensor value_strides_0 = const()[name = tensor("value_strides_0"), val = tensor([1, 1])]; tensor value_pad_0 = const()[name = tensor("value_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_dilations_0 = const()[name = tensor("value_dilations_0"), val = tensor([1, 1])]; tensor value_groups_0 = const()[name = tensor("value_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35934720)))]; tensor layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36459072)))]; tensor value_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("value_cast_fp16")]; tensor var_3245_begin_0 = const()[name = tensor("op_3245_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3245_end_0 = const()[name = tensor("op_3245_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3245_end_mask_0 = const()[name = tensor("op_3245_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3245_cast_fp16 = slice_by_index(begin = var_3245_begin_0, end = var_3245_end_0, end_mask = var_3245_end_mask_0, x = query_cast_fp16)[name = tensor("op_3245_cast_fp16")]; tensor var_3249_begin_0 = const()[name = tensor("op_3249_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_3249_end_0 = const()[name = tensor("op_3249_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_3249_end_mask_0 = const()[name = tensor("op_3249_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3249_cast_fp16 = slice_by_index(begin = var_3249_begin_0, end = var_3249_end_0, end_mask = var_3249_end_mask_0, x = query_cast_fp16)[name = tensor("op_3249_cast_fp16")]; tensor var_3253_begin_0 = const()[name = tensor("op_3253_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_3253_end_0 = const()[name = tensor("op_3253_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_3253_end_mask_0 = const()[name = tensor("op_3253_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3253_cast_fp16 = slice_by_index(begin = var_3253_begin_0, end = var_3253_end_0, end_mask = var_3253_end_mask_0, x = query_cast_fp16)[name = tensor("op_3253_cast_fp16")]; tensor var_3257_begin_0 = const()[name = tensor("op_3257_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_3257_end_0 = const()[name = tensor("op_3257_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_3257_end_mask_0 = const()[name = tensor("op_3257_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3257_cast_fp16 = slice_by_index(begin = var_3257_begin_0, end = var_3257_end_0, end_mask = var_3257_end_mask_0, x = query_cast_fp16)[name = tensor("op_3257_cast_fp16")]; tensor var_3261_begin_0 = const()[name = tensor("op_3261_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_3261_end_0 = const()[name = tensor("op_3261_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_3261_end_mask_0 = const()[name = tensor("op_3261_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3261_cast_fp16 = slice_by_index(begin = var_3261_begin_0, end = var_3261_end_0, end_mask = var_3261_end_mask_0, x = query_cast_fp16)[name = tensor("op_3261_cast_fp16")]; tensor var_3265_begin_0 = const()[name = tensor("op_3265_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3265_end_0 = const()[name = tensor("op_3265_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_3265_end_mask_0 = const()[name = tensor("op_3265_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3265_cast_fp16 = slice_by_index(begin = var_3265_begin_0, end = var_3265_end_0, end_mask = var_3265_end_mask_0, x = query_cast_fp16)[name = tensor("op_3265_cast_fp16")]; tensor var_3269_begin_0 = const()[name = tensor("op_3269_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_3269_end_0 = const()[name = tensor("op_3269_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_3269_end_mask_0 = const()[name = tensor("op_3269_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3269_cast_fp16 = slice_by_index(begin = var_3269_begin_0, end = var_3269_end_0, end_mask = var_3269_end_mask_0, x = query_cast_fp16)[name = tensor("op_3269_cast_fp16")]; tensor var_3273_begin_0 = const()[name = tensor("op_3273_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_3273_end_0 = const()[name = tensor("op_3273_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_3273_end_mask_0 = const()[name = tensor("op_3273_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3273_cast_fp16 = slice_by_index(begin = var_3273_begin_0, end = var_3273_end_0, end_mask = var_3273_end_mask_0, x = query_cast_fp16)[name = tensor("op_3273_cast_fp16")]; tensor var_3276_begin_0 = const()[name = tensor("op_3276_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3276_end_0 = const()[name = tensor("op_3276_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3276_end_mask_0 = const()[name = tensor("op_3276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3276_cast_fp16 = slice_by_index(begin = var_3276_begin_0, end = var_3276_end_0, end_mask = var_3276_end_mask_0, x = var_3245_cast_fp16)[name = tensor("op_3276_cast_fp16")]; tensor var_3277_begin_0 = const()[name = tensor("op_3277_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3277_end_0 = const()[name = tensor("op_3277_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3277_end_mask_0 = const()[name = tensor("op_3277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3277_cast_fp16 = slice_by_index(begin = var_3277_begin_0, end = var_3277_end_0, end_mask = var_3277_end_mask_0, x = var_3245_cast_fp16)[name = tensor("op_3277_cast_fp16")]; tensor var_3278_begin_0 = const()[name = tensor("op_3278_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3278_end_0 = const()[name = tensor("op_3278_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3278_end_mask_0 = const()[name = tensor("op_3278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3278_cast_fp16 = slice_by_index(begin = var_3278_begin_0, end = var_3278_end_0, end_mask = var_3278_end_mask_0, x = var_3245_cast_fp16)[name = tensor("op_3278_cast_fp16")]; tensor var_3279_begin_0 = const()[name = tensor("op_3279_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3279_end_0 = const()[name = tensor("op_3279_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3279_end_mask_0 = const()[name = tensor("op_3279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3279_cast_fp16 = slice_by_index(begin = var_3279_begin_0, end = var_3279_end_0, end_mask = var_3279_end_mask_0, x = var_3245_cast_fp16)[name = tensor("op_3279_cast_fp16")]; tensor var_3280_begin_0 = const()[name = tensor("op_3280_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3280_end_0 = const()[name = tensor("op_3280_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3280_end_mask_0 = const()[name = tensor("op_3280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3280_cast_fp16 = slice_by_index(begin = var_3280_begin_0, end = var_3280_end_0, end_mask = var_3280_end_mask_0, x = var_3245_cast_fp16)[name = tensor("op_3280_cast_fp16")]; tensor var_3281_begin_0 = const()[name = tensor("op_3281_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3281_end_0 = const()[name = tensor("op_3281_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3281_end_mask_0 = const()[name = tensor("op_3281_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3281_cast_fp16 = slice_by_index(begin = var_3281_begin_0, end = var_3281_end_0, end_mask = var_3281_end_mask_0, x = var_3245_cast_fp16)[name = tensor("op_3281_cast_fp16")]; tensor var_3282_begin_0 = const()[name = tensor("op_3282_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3282_end_0 = const()[name = tensor("op_3282_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3282_end_mask_0 = const()[name = tensor("op_3282_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3282_cast_fp16 = slice_by_index(begin = var_3282_begin_0, end = var_3282_end_0, end_mask = var_3282_end_mask_0, x = var_3249_cast_fp16)[name = tensor("op_3282_cast_fp16")]; tensor var_3283_begin_0 = const()[name = tensor("op_3283_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3283_end_0 = const()[name = tensor("op_3283_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3283_end_mask_0 = const()[name = tensor("op_3283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3283_cast_fp16 = slice_by_index(begin = var_3283_begin_0, end = var_3283_end_0, end_mask = var_3283_end_mask_0, x = var_3249_cast_fp16)[name = tensor("op_3283_cast_fp16")]; tensor var_3284_begin_0 = const()[name = tensor("op_3284_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3284_end_0 = const()[name = tensor("op_3284_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3284_end_mask_0 = const()[name = tensor("op_3284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3284_cast_fp16 = slice_by_index(begin = var_3284_begin_0, end = var_3284_end_0, end_mask = var_3284_end_mask_0, x = var_3249_cast_fp16)[name = tensor("op_3284_cast_fp16")]; tensor var_3285_begin_0 = const()[name = tensor("op_3285_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3285_end_0 = const()[name = tensor("op_3285_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3285_end_mask_0 = const()[name = tensor("op_3285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3285_cast_fp16 = slice_by_index(begin = var_3285_begin_0, end = var_3285_end_0, end_mask = var_3285_end_mask_0, x = var_3249_cast_fp16)[name = tensor("op_3285_cast_fp16")]; tensor var_3286_begin_0 = const()[name = tensor("op_3286_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3286_end_0 = const()[name = tensor("op_3286_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3286_end_mask_0 = const()[name = tensor("op_3286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3286_cast_fp16 = slice_by_index(begin = var_3286_begin_0, end = var_3286_end_0, end_mask = var_3286_end_mask_0, x = var_3249_cast_fp16)[name = tensor("op_3286_cast_fp16")]; tensor var_3287_begin_0 = const()[name = tensor("op_3287_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3287_end_0 = const()[name = tensor("op_3287_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3287_end_mask_0 = const()[name = tensor("op_3287_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3287_cast_fp16 = slice_by_index(begin = var_3287_begin_0, end = var_3287_end_0, end_mask = var_3287_end_mask_0, x = var_3249_cast_fp16)[name = tensor("op_3287_cast_fp16")]; tensor var_3288_begin_0 = const()[name = tensor("op_3288_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3288_end_0 = const()[name = tensor("op_3288_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3288_end_mask_0 = const()[name = tensor("op_3288_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3288_cast_fp16 = slice_by_index(begin = var_3288_begin_0, end = var_3288_end_0, end_mask = var_3288_end_mask_0, x = var_3253_cast_fp16)[name = tensor("op_3288_cast_fp16")]; tensor var_3289_begin_0 = const()[name = tensor("op_3289_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3289_end_0 = const()[name = tensor("op_3289_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3289_end_mask_0 = const()[name = tensor("op_3289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3289_cast_fp16 = slice_by_index(begin = var_3289_begin_0, end = var_3289_end_0, end_mask = var_3289_end_mask_0, x = var_3253_cast_fp16)[name = tensor("op_3289_cast_fp16")]; tensor var_3290_begin_0 = const()[name = tensor("op_3290_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3290_end_0 = const()[name = tensor("op_3290_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3290_end_mask_0 = const()[name = tensor("op_3290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3290_cast_fp16 = slice_by_index(begin = var_3290_begin_0, end = var_3290_end_0, end_mask = var_3290_end_mask_0, x = var_3253_cast_fp16)[name = tensor("op_3290_cast_fp16")]; tensor var_3291_begin_0 = const()[name = tensor("op_3291_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3291_end_0 = const()[name = tensor("op_3291_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3291_end_mask_0 = const()[name = tensor("op_3291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3291_cast_fp16 = slice_by_index(begin = var_3291_begin_0, end = var_3291_end_0, end_mask = var_3291_end_mask_0, x = var_3253_cast_fp16)[name = tensor("op_3291_cast_fp16")]; tensor var_3292_begin_0 = const()[name = tensor("op_3292_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3292_end_0 = const()[name = tensor("op_3292_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3292_end_mask_0 = const()[name = tensor("op_3292_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3292_cast_fp16 = slice_by_index(begin = var_3292_begin_0, end = var_3292_end_0, end_mask = var_3292_end_mask_0, x = var_3253_cast_fp16)[name = tensor("op_3292_cast_fp16")]; tensor var_3293_begin_0 = const()[name = tensor("op_3293_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3293_end_0 = const()[name = tensor("op_3293_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3293_end_mask_0 = const()[name = tensor("op_3293_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3293_cast_fp16 = slice_by_index(begin = var_3293_begin_0, end = var_3293_end_0, end_mask = var_3293_end_mask_0, x = var_3253_cast_fp16)[name = tensor("op_3293_cast_fp16")]; tensor var_3294_begin_0 = const()[name = tensor("op_3294_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3294_end_0 = const()[name = tensor("op_3294_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3294_end_mask_0 = const()[name = tensor("op_3294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3294_cast_fp16 = slice_by_index(begin = var_3294_begin_0, end = var_3294_end_0, end_mask = var_3294_end_mask_0, x = var_3257_cast_fp16)[name = tensor("op_3294_cast_fp16")]; tensor var_3295_begin_0 = const()[name = tensor("op_3295_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3295_end_0 = const()[name = tensor("op_3295_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3295_end_mask_0 = const()[name = tensor("op_3295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3295_cast_fp16 = slice_by_index(begin = var_3295_begin_0, end = var_3295_end_0, end_mask = var_3295_end_mask_0, x = var_3257_cast_fp16)[name = tensor("op_3295_cast_fp16")]; tensor var_3296_begin_0 = const()[name = tensor("op_3296_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3296_end_0 = const()[name = tensor("op_3296_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3296_end_mask_0 = const()[name = tensor("op_3296_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3296_cast_fp16 = slice_by_index(begin = var_3296_begin_0, end = var_3296_end_0, end_mask = var_3296_end_mask_0, x = var_3257_cast_fp16)[name = tensor("op_3296_cast_fp16")]; tensor var_3297_begin_0 = const()[name = tensor("op_3297_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3297_end_0 = const()[name = tensor("op_3297_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3297_end_mask_0 = const()[name = tensor("op_3297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3297_cast_fp16 = slice_by_index(begin = var_3297_begin_0, end = var_3297_end_0, end_mask = var_3297_end_mask_0, x = var_3257_cast_fp16)[name = tensor("op_3297_cast_fp16")]; tensor var_3298_begin_0 = const()[name = tensor("op_3298_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3298_end_0 = const()[name = tensor("op_3298_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3298_end_mask_0 = const()[name = tensor("op_3298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3298_cast_fp16 = slice_by_index(begin = var_3298_begin_0, end = var_3298_end_0, end_mask = var_3298_end_mask_0, x = var_3257_cast_fp16)[name = tensor("op_3298_cast_fp16")]; tensor var_3299_begin_0 = const()[name = tensor("op_3299_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3299_end_0 = const()[name = tensor("op_3299_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3299_end_mask_0 = const()[name = tensor("op_3299_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3299_cast_fp16 = slice_by_index(begin = var_3299_begin_0, end = var_3299_end_0, end_mask = var_3299_end_mask_0, x = var_3257_cast_fp16)[name = tensor("op_3299_cast_fp16")]; tensor var_3300_begin_0 = const()[name = tensor("op_3300_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3300_end_0 = const()[name = tensor("op_3300_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3300_end_mask_0 = const()[name = tensor("op_3300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3300_cast_fp16 = slice_by_index(begin = var_3300_begin_0, end = var_3300_end_0, end_mask = var_3300_end_mask_0, x = var_3261_cast_fp16)[name = tensor("op_3300_cast_fp16")]; tensor var_3301_begin_0 = const()[name = tensor("op_3301_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3301_end_0 = const()[name = tensor("op_3301_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3301_end_mask_0 = const()[name = tensor("op_3301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3301_cast_fp16 = slice_by_index(begin = var_3301_begin_0, end = var_3301_end_0, end_mask = var_3301_end_mask_0, x = var_3261_cast_fp16)[name = tensor("op_3301_cast_fp16")]; tensor var_3302_begin_0 = const()[name = tensor("op_3302_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3302_end_0 = const()[name = tensor("op_3302_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3302_end_mask_0 = const()[name = tensor("op_3302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3302_cast_fp16 = slice_by_index(begin = var_3302_begin_0, end = var_3302_end_0, end_mask = var_3302_end_mask_0, x = var_3261_cast_fp16)[name = tensor("op_3302_cast_fp16")]; tensor var_3303_begin_0 = const()[name = tensor("op_3303_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3303_end_0 = const()[name = tensor("op_3303_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3303_end_mask_0 = const()[name = tensor("op_3303_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3303_cast_fp16 = slice_by_index(begin = var_3303_begin_0, end = var_3303_end_0, end_mask = var_3303_end_mask_0, x = var_3261_cast_fp16)[name = tensor("op_3303_cast_fp16")]; tensor var_3304_begin_0 = const()[name = tensor("op_3304_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3304_end_0 = const()[name = tensor("op_3304_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3304_end_mask_0 = const()[name = tensor("op_3304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3304_cast_fp16 = slice_by_index(begin = var_3304_begin_0, end = var_3304_end_0, end_mask = var_3304_end_mask_0, x = var_3261_cast_fp16)[name = tensor("op_3304_cast_fp16")]; tensor var_3305_begin_0 = const()[name = tensor("op_3305_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3305_end_0 = const()[name = tensor("op_3305_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3305_end_mask_0 = const()[name = tensor("op_3305_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3305_cast_fp16 = slice_by_index(begin = var_3305_begin_0, end = var_3305_end_0, end_mask = var_3305_end_mask_0, x = var_3261_cast_fp16)[name = tensor("op_3305_cast_fp16")]; tensor var_3306_begin_0 = const()[name = tensor("op_3306_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3306_end_0 = const()[name = tensor("op_3306_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3306_end_mask_0 = const()[name = tensor("op_3306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3306_cast_fp16 = slice_by_index(begin = var_3306_begin_0, end = var_3306_end_0, end_mask = var_3306_end_mask_0, x = var_3265_cast_fp16)[name = tensor("op_3306_cast_fp16")]; tensor var_3307_begin_0 = const()[name = tensor("op_3307_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3307_end_0 = const()[name = tensor("op_3307_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3307_end_mask_0 = const()[name = tensor("op_3307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3307_cast_fp16 = slice_by_index(begin = var_3307_begin_0, end = var_3307_end_0, end_mask = var_3307_end_mask_0, x = var_3265_cast_fp16)[name = tensor("op_3307_cast_fp16")]; tensor var_3308_begin_0 = const()[name = tensor("op_3308_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3308_end_0 = const()[name = tensor("op_3308_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3308_end_mask_0 = const()[name = tensor("op_3308_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3308_cast_fp16 = slice_by_index(begin = var_3308_begin_0, end = var_3308_end_0, end_mask = var_3308_end_mask_0, x = var_3265_cast_fp16)[name = tensor("op_3308_cast_fp16")]; tensor var_3309_begin_0 = const()[name = tensor("op_3309_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3309_end_0 = const()[name = tensor("op_3309_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3309_end_mask_0 = const()[name = tensor("op_3309_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3309_cast_fp16 = slice_by_index(begin = var_3309_begin_0, end = var_3309_end_0, end_mask = var_3309_end_mask_0, x = var_3265_cast_fp16)[name = tensor("op_3309_cast_fp16")]; tensor var_3310_begin_0 = const()[name = tensor("op_3310_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3310_end_0 = const()[name = tensor("op_3310_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3310_end_mask_0 = const()[name = tensor("op_3310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3310_cast_fp16 = slice_by_index(begin = var_3310_begin_0, end = var_3310_end_0, end_mask = var_3310_end_mask_0, x = var_3265_cast_fp16)[name = tensor("op_3310_cast_fp16")]; tensor var_3311_begin_0 = const()[name = tensor("op_3311_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3311_end_0 = const()[name = tensor("op_3311_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3311_end_mask_0 = const()[name = tensor("op_3311_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3311_cast_fp16 = slice_by_index(begin = var_3311_begin_0, end = var_3311_end_0, end_mask = var_3311_end_mask_0, x = var_3265_cast_fp16)[name = tensor("op_3311_cast_fp16")]; tensor var_3312_begin_0 = const()[name = tensor("op_3312_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3312_end_0 = const()[name = tensor("op_3312_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3312_end_mask_0 = const()[name = tensor("op_3312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3312_cast_fp16 = slice_by_index(begin = var_3312_begin_0, end = var_3312_end_0, end_mask = var_3312_end_mask_0, x = var_3269_cast_fp16)[name = tensor("op_3312_cast_fp16")]; tensor var_3313_begin_0 = const()[name = tensor("op_3313_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3313_end_0 = const()[name = tensor("op_3313_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3313_end_mask_0 = const()[name = tensor("op_3313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3313_cast_fp16 = slice_by_index(begin = var_3313_begin_0, end = var_3313_end_0, end_mask = var_3313_end_mask_0, x = var_3269_cast_fp16)[name = tensor("op_3313_cast_fp16")]; tensor var_3314_begin_0 = const()[name = tensor("op_3314_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3314_end_0 = const()[name = tensor("op_3314_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3314_end_mask_0 = const()[name = tensor("op_3314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3314_cast_fp16 = slice_by_index(begin = var_3314_begin_0, end = var_3314_end_0, end_mask = var_3314_end_mask_0, x = var_3269_cast_fp16)[name = tensor("op_3314_cast_fp16")]; tensor var_3315_begin_0 = const()[name = tensor("op_3315_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3315_end_0 = const()[name = tensor("op_3315_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3315_end_mask_0 = const()[name = tensor("op_3315_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3315_cast_fp16 = slice_by_index(begin = var_3315_begin_0, end = var_3315_end_0, end_mask = var_3315_end_mask_0, x = var_3269_cast_fp16)[name = tensor("op_3315_cast_fp16")]; tensor var_3316_begin_0 = const()[name = tensor("op_3316_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3316_end_0 = const()[name = tensor("op_3316_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3316_end_mask_0 = const()[name = tensor("op_3316_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3316_cast_fp16 = slice_by_index(begin = var_3316_begin_0, end = var_3316_end_0, end_mask = var_3316_end_mask_0, x = var_3269_cast_fp16)[name = tensor("op_3316_cast_fp16")]; tensor var_3317_begin_0 = const()[name = tensor("op_3317_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3317_end_0 = const()[name = tensor("op_3317_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3317_end_mask_0 = const()[name = tensor("op_3317_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3317_cast_fp16 = slice_by_index(begin = var_3317_begin_0, end = var_3317_end_0, end_mask = var_3317_end_mask_0, x = var_3269_cast_fp16)[name = tensor("op_3317_cast_fp16")]; tensor var_3318_begin_0 = const()[name = tensor("op_3318_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3318_end_0 = const()[name = tensor("op_3318_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3318_end_mask_0 = const()[name = tensor("op_3318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3318_cast_fp16 = slice_by_index(begin = var_3318_begin_0, end = var_3318_end_0, end_mask = var_3318_end_mask_0, x = var_3273_cast_fp16)[name = tensor("op_3318_cast_fp16")]; tensor var_3319_begin_0 = const()[name = tensor("op_3319_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3319_end_0 = const()[name = tensor("op_3319_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3319_end_mask_0 = const()[name = tensor("op_3319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3319_cast_fp16 = slice_by_index(begin = var_3319_begin_0, end = var_3319_end_0, end_mask = var_3319_end_mask_0, x = var_3273_cast_fp16)[name = tensor("op_3319_cast_fp16")]; tensor var_3320_begin_0 = const()[name = tensor("op_3320_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3320_end_0 = const()[name = tensor("op_3320_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3320_end_mask_0 = const()[name = tensor("op_3320_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3320_cast_fp16 = slice_by_index(begin = var_3320_begin_0, end = var_3320_end_0, end_mask = var_3320_end_mask_0, x = var_3273_cast_fp16)[name = tensor("op_3320_cast_fp16")]; tensor var_3321_begin_0 = const()[name = tensor("op_3321_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3321_end_0 = const()[name = tensor("op_3321_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3321_end_mask_0 = const()[name = tensor("op_3321_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3321_cast_fp16 = slice_by_index(begin = var_3321_begin_0, end = var_3321_end_0, end_mask = var_3321_end_mask_0, x = var_3273_cast_fp16)[name = tensor("op_3321_cast_fp16")]; tensor var_3322_begin_0 = const()[name = tensor("op_3322_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3322_end_0 = const()[name = tensor("op_3322_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3322_end_mask_0 = const()[name = tensor("op_3322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3322_cast_fp16 = slice_by_index(begin = var_3322_begin_0, end = var_3322_end_0, end_mask = var_3322_end_mask_0, x = var_3273_cast_fp16)[name = tensor("op_3322_cast_fp16")]; tensor var_3323_begin_0 = const()[name = tensor("op_3323_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3323_end_0 = const()[name = tensor("op_3323_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3323_end_mask_0 = const()[name = tensor("op_3323_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3323_cast_fp16 = slice_by_index(begin = var_3323_begin_0, end = var_3323_end_0, end_mask = var_3323_end_mask_0, x = var_3273_cast_fp16)[name = tensor("op_3323_cast_fp16")]; tensor k_perm_0 = const()[name = tensor("k_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_3328_begin_0 = const()[name = tensor("op_3328_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3328_end_0 = const()[name = tensor("op_3328_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_3328_end_mask_0 = const()[name = tensor("op_3328_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor("transpose_0")]; tensor var_3328_cast_fp16 = slice_by_index(begin = var_3328_begin_0, end = var_3328_end_0, end_mask = var_3328_end_mask_0, x = k_cast_fp16)[name = tensor("op_3328_cast_fp16")]; tensor var_3332_begin_0 = const()[name = tensor("op_3332_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_3332_end_0 = const()[name = tensor("op_3332_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_3332_end_mask_0 = const()[name = tensor("op_3332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3332_cast_fp16 = slice_by_index(begin = var_3332_begin_0, end = var_3332_end_0, end_mask = var_3332_end_mask_0, x = k_cast_fp16)[name = tensor("op_3332_cast_fp16")]; tensor var_3336_begin_0 = const()[name = tensor("op_3336_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_3336_end_0 = const()[name = tensor("op_3336_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_3336_end_mask_0 = const()[name = tensor("op_3336_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3336_cast_fp16 = slice_by_index(begin = var_3336_begin_0, end = var_3336_end_0, end_mask = var_3336_end_mask_0, x = k_cast_fp16)[name = tensor("op_3336_cast_fp16")]; tensor var_3340_begin_0 = const()[name = tensor("op_3340_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_3340_end_0 = const()[name = tensor("op_3340_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_3340_end_mask_0 = const()[name = tensor("op_3340_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3340_cast_fp16 = slice_by_index(begin = var_3340_begin_0, end = var_3340_end_0, end_mask = var_3340_end_mask_0, x = k_cast_fp16)[name = tensor("op_3340_cast_fp16")]; tensor var_3344_begin_0 = const()[name = tensor("op_3344_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3344_end_0 = const()[name = tensor("op_3344_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_3344_end_mask_0 = const()[name = tensor("op_3344_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3344_cast_fp16 = slice_by_index(begin = var_3344_begin_0, end = var_3344_end_0, end_mask = var_3344_end_mask_0, x = k_cast_fp16)[name = tensor("op_3344_cast_fp16")]; tensor var_3348_begin_0 = const()[name = tensor("op_3348_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_3348_end_0 = const()[name = tensor("op_3348_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_3348_end_mask_0 = const()[name = tensor("op_3348_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3348_cast_fp16 = slice_by_index(begin = var_3348_begin_0, end = var_3348_end_0, end_mask = var_3348_end_mask_0, x = k_cast_fp16)[name = tensor("op_3348_cast_fp16")]; tensor var_3352_begin_0 = const()[name = tensor("op_3352_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_3352_end_0 = const()[name = tensor("op_3352_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_3352_end_mask_0 = const()[name = tensor("op_3352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3352_cast_fp16 = slice_by_index(begin = var_3352_begin_0, end = var_3352_end_0, end_mask = var_3352_end_mask_0, x = k_cast_fp16)[name = tensor("op_3352_cast_fp16")]; tensor var_3356_begin_0 = const()[name = tensor("op_3356_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_3356_end_0 = const()[name = tensor("op_3356_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_3356_end_mask_0 = const()[name = tensor("op_3356_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3356_cast_fp16 = slice_by_index(begin = var_3356_begin_0, end = var_3356_end_0, end_mask = var_3356_end_mask_0, x = k_cast_fp16)[name = tensor("op_3356_cast_fp16")]; tensor var_3358_begin_0 = const()[name = tensor("op_3358_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3358_end_0 = const()[name = tensor("op_3358_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3358_end_mask_0 = const()[name = tensor("op_3358_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3358_cast_fp16 = slice_by_index(begin = var_3358_begin_0, end = var_3358_end_0, end_mask = var_3358_end_mask_0, x = value_cast_fp16)[name = tensor("op_3358_cast_fp16")]; tensor var_3362_begin_0 = const()[name = tensor("op_3362_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_3362_end_0 = const()[name = tensor("op_3362_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_3362_end_mask_0 = const()[name = tensor("op_3362_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3362_cast_fp16 = slice_by_index(begin = var_3362_begin_0, end = var_3362_end_0, end_mask = var_3362_end_mask_0, x = value_cast_fp16)[name = tensor("op_3362_cast_fp16")]; tensor var_3366_begin_0 = const()[name = tensor("op_3366_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_3366_end_0 = const()[name = tensor("op_3366_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_3366_end_mask_0 = const()[name = tensor("op_3366_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3366_cast_fp16 = slice_by_index(begin = var_3366_begin_0, end = var_3366_end_0, end_mask = var_3366_end_mask_0, x = value_cast_fp16)[name = tensor("op_3366_cast_fp16")]; tensor var_3370_begin_0 = const()[name = tensor("op_3370_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_3370_end_0 = const()[name = tensor("op_3370_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_3370_end_mask_0 = const()[name = tensor("op_3370_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3370_cast_fp16 = slice_by_index(begin = var_3370_begin_0, end = var_3370_end_0, end_mask = var_3370_end_mask_0, x = value_cast_fp16)[name = tensor("op_3370_cast_fp16")]; tensor var_3374_begin_0 = const()[name = tensor("op_3374_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_3374_end_0 = const()[name = tensor("op_3374_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_3374_end_mask_0 = const()[name = tensor("op_3374_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3374_cast_fp16 = slice_by_index(begin = var_3374_begin_0, end = var_3374_end_0, end_mask = var_3374_end_mask_0, x = value_cast_fp16)[name = tensor("op_3374_cast_fp16")]; tensor var_3378_begin_0 = const()[name = tensor("op_3378_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3378_end_0 = const()[name = tensor("op_3378_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_3378_end_mask_0 = const()[name = tensor("op_3378_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3378_cast_fp16 = slice_by_index(begin = var_3378_begin_0, end = var_3378_end_0, end_mask = var_3378_end_mask_0, x = value_cast_fp16)[name = tensor("op_3378_cast_fp16")]; tensor var_3382_begin_0 = const()[name = tensor("op_3382_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_3382_end_0 = const()[name = tensor("op_3382_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_3382_end_mask_0 = const()[name = tensor("op_3382_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3382_cast_fp16 = slice_by_index(begin = var_3382_begin_0, end = var_3382_end_0, end_mask = var_3382_end_mask_0, x = value_cast_fp16)[name = tensor("op_3382_cast_fp16")]; tensor var_3386_begin_0 = const()[name = tensor("op_3386_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_3386_end_0 = const()[name = tensor("op_3386_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_3386_end_mask_0 = const()[name = tensor("op_3386_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3386_cast_fp16 = slice_by_index(begin = var_3386_begin_0, end = var_3386_end_0, end_mask = var_3386_end_mask_0, x = value_cast_fp16)[name = tensor("op_3386_cast_fp16")]; tensor _SplitHeadsQ__mh_w_481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_481_equation_0, values = (var_3328_cast_fp16, var_3276_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_483_equation_0, values = (var_3328_cast_fp16, var_3277_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_485_equation_0, values = (var_3328_cast_fp16, var_3278_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_487_equation_0, values = (var_3328_cast_fp16, var_3279_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_489_equation_0, values = (var_3328_cast_fp16, var_3280_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_491_equation_0, values = (var_3328_cast_fp16, var_3281_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_493_equation_0, values = (var_3332_cast_fp16, var_3282_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_495_equation_0, values = (var_3332_cast_fp16, var_3283_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_497_equation_0, values = (var_3332_cast_fp16, var_3284_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_499_equation_0, values = (var_3332_cast_fp16, var_3285_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_501_equation_0, values = (var_3332_cast_fp16, var_3286_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_503_equation_0, values = (var_3332_cast_fp16, var_3287_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_505_equation_0, values = (var_3336_cast_fp16, var_3288_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_507_equation_0, values = (var_3336_cast_fp16, var_3289_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_509_equation_0, values = (var_3336_cast_fp16, var_3290_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_511_equation_0, values = (var_3336_cast_fp16, var_3291_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_513_equation_0, values = (var_3336_cast_fp16, var_3292_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_515_equation_0, values = (var_3336_cast_fp16, var_3293_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_517_equation_0, values = (var_3340_cast_fp16, var_3294_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_519_equation_0, values = (var_3340_cast_fp16, var_3295_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_521_equation_0, values = (var_3340_cast_fp16, var_3296_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_523_equation_0, values = (var_3340_cast_fp16, var_3297_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_525_equation_0, values = (var_3340_cast_fp16, var_3298_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_527_equation_0, values = (var_3340_cast_fp16, var_3299_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_529_equation_0, values = (var_3344_cast_fp16, var_3300_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_531_equation_0, values = (var_3344_cast_fp16, var_3301_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_533_equation_0, values = (var_3344_cast_fp16, var_3302_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_535_equation_0, values = (var_3344_cast_fp16, var_3303_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_537_equation_0, values = (var_3344_cast_fp16, var_3304_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_539_equation_0, values = (var_3344_cast_fp16, var_3305_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_541_equation_0, values = (var_3348_cast_fp16, var_3306_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_543_equation_0, values = (var_3348_cast_fp16, var_3307_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_545_equation_0, values = (var_3348_cast_fp16, var_3308_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_547_equation_0, values = (var_3348_cast_fp16, var_3309_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_549_equation_0, values = (var_3348_cast_fp16, var_3310_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_551_equation_0, values = (var_3348_cast_fp16, var_3311_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_553_equation_0, values = (var_3352_cast_fp16, var_3312_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_555_equation_0, values = (var_3352_cast_fp16, var_3313_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_557_equation_0, values = (var_3352_cast_fp16, var_3314_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_559_equation_0, values = (var_3352_cast_fp16, var_3315_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_561_equation_0, values = (var_3352_cast_fp16, var_3316_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_563_equation_0, values = (var_3352_cast_fp16, var_3317_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_565_equation_0, values = (var_3356_cast_fp16, var_3318_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_567_equation_0, values = (var_3356_cast_fp16, var_3319_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_569_equation_0, values = (var_3356_cast_fp16, var_3320_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_571_equation_0, values = (var_3356_cast_fp16, var_3321_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_573_equation_0, values = (var_3356_cast_fp16, var_3322_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_equation_0, values = (var_3356_cast_fp16, var_3323_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_cast_fp16")]; tensor var_3485_to_fp16 = const()[name = tensor("op_3485_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_481_cast_fp16, y = var_3485_to_fp16)[name = tensor("aw_chunk_481_cast_fp16")]; tensor var_3487_to_fp16 = const()[name = tensor("op_3487_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_483_cast_fp16, y = var_3487_to_fp16)[name = tensor("aw_chunk_483_cast_fp16")]; tensor var_3489_to_fp16 = const()[name = tensor("op_3489_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_485_cast_fp16, y = var_3489_to_fp16)[name = tensor("aw_chunk_485_cast_fp16")]; tensor var_3491_to_fp16 = const()[name = tensor("op_3491_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_487_cast_fp16, y = var_3491_to_fp16)[name = tensor("aw_chunk_487_cast_fp16")]; tensor var_3493_to_fp16 = const()[name = tensor("op_3493_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_489_cast_fp16, y = var_3493_to_fp16)[name = tensor("aw_chunk_489_cast_fp16")]; tensor var_3495_to_fp16 = const()[name = tensor("op_3495_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_491_cast_fp16, y = var_3495_to_fp16)[name = tensor("aw_chunk_491_cast_fp16")]; tensor var_3497_to_fp16 = const()[name = tensor("op_3497_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_493_cast_fp16, y = var_3497_to_fp16)[name = tensor("aw_chunk_493_cast_fp16")]; tensor var_3499_to_fp16 = const()[name = tensor("op_3499_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_495_cast_fp16, y = var_3499_to_fp16)[name = tensor("aw_chunk_495_cast_fp16")]; tensor var_3501_to_fp16 = const()[name = tensor("op_3501_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_497_cast_fp16, y = var_3501_to_fp16)[name = tensor("aw_chunk_497_cast_fp16")]; tensor var_3503_to_fp16 = const()[name = tensor("op_3503_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_499_cast_fp16, y = var_3503_to_fp16)[name = tensor("aw_chunk_499_cast_fp16")]; tensor var_3505_to_fp16 = const()[name = tensor("op_3505_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_501_cast_fp16, y = var_3505_to_fp16)[name = tensor("aw_chunk_501_cast_fp16")]; tensor var_3507_to_fp16 = const()[name = tensor("op_3507_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_503_cast_fp16, y = var_3507_to_fp16)[name = tensor("aw_chunk_503_cast_fp16")]; tensor var_3509_to_fp16 = const()[name = tensor("op_3509_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_505_cast_fp16, y = var_3509_to_fp16)[name = tensor("aw_chunk_505_cast_fp16")]; tensor var_3511_to_fp16 = const()[name = tensor("op_3511_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_507_cast_fp16, y = var_3511_to_fp16)[name = tensor("aw_chunk_507_cast_fp16")]; tensor var_3513_to_fp16 = const()[name = tensor("op_3513_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_509_cast_fp16, y = var_3513_to_fp16)[name = tensor("aw_chunk_509_cast_fp16")]; tensor var_3515_to_fp16 = const()[name = tensor("op_3515_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_511_cast_fp16, y = var_3515_to_fp16)[name = tensor("aw_chunk_511_cast_fp16")]; tensor var_3517_to_fp16 = const()[name = tensor("op_3517_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_513_cast_fp16, y = var_3517_to_fp16)[name = tensor("aw_chunk_513_cast_fp16")]; tensor var_3519_to_fp16 = const()[name = tensor("op_3519_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_515_cast_fp16, y = var_3519_to_fp16)[name = tensor("aw_chunk_515_cast_fp16")]; tensor var_3521_to_fp16 = const()[name = tensor("op_3521_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_517_cast_fp16, y = var_3521_to_fp16)[name = tensor("aw_chunk_517_cast_fp16")]; tensor var_3523_to_fp16 = const()[name = tensor("op_3523_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_519_cast_fp16, y = var_3523_to_fp16)[name = tensor("aw_chunk_519_cast_fp16")]; tensor var_3525_to_fp16 = const()[name = tensor("op_3525_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_521_cast_fp16, y = var_3525_to_fp16)[name = tensor("aw_chunk_521_cast_fp16")]; tensor var_3527_to_fp16 = const()[name = tensor("op_3527_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_523_cast_fp16, y = var_3527_to_fp16)[name = tensor("aw_chunk_523_cast_fp16")]; tensor var_3529_to_fp16 = const()[name = tensor("op_3529_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_525_cast_fp16, y = var_3529_to_fp16)[name = tensor("aw_chunk_525_cast_fp16")]; tensor var_3531_to_fp16 = const()[name = tensor("op_3531_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_527_cast_fp16, y = var_3531_to_fp16)[name = tensor("aw_chunk_527_cast_fp16")]; tensor var_3533_to_fp16 = const()[name = tensor("op_3533_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_529_cast_fp16, y = var_3533_to_fp16)[name = tensor("aw_chunk_529_cast_fp16")]; tensor var_3535_to_fp16 = const()[name = tensor("op_3535_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_531_cast_fp16, y = var_3535_to_fp16)[name = tensor("aw_chunk_531_cast_fp16")]; tensor var_3537_to_fp16 = const()[name = tensor("op_3537_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_533_cast_fp16, y = var_3537_to_fp16)[name = tensor("aw_chunk_533_cast_fp16")]; tensor var_3539_to_fp16 = const()[name = tensor("op_3539_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_535_cast_fp16, y = var_3539_to_fp16)[name = tensor("aw_chunk_535_cast_fp16")]; tensor var_3541_to_fp16 = const()[name = tensor("op_3541_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_537_cast_fp16, y = var_3541_to_fp16)[name = tensor("aw_chunk_537_cast_fp16")]; tensor var_3543_to_fp16 = const()[name = tensor("op_3543_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_539_cast_fp16, y = var_3543_to_fp16)[name = tensor("aw_chunk_539_cast_fp16")]; tensor var_3545_to_fp16 = const()[name = tensor("op_3545_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_541_cast_fp16, y = var_3545_to_fp16)[name = tensor("aw_chunk_541_cast_fp16")]; tensor var_3547_to_fp16 = const()[name = tensor("op_3547_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_543_cast_fp16, y = var_3547_to_fp16)[name = tensor("aw_chunk_543_cast_fp16")]; tensor var_3549_to_fp16 = const()[name = tensor("op_3549_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_545_cast_fp16, y = var_3549_to_fp16)[name = tensor("aw_chunk_545_cast_fp16")]; tensor var_3551_to_fp16 = const()[name = tensor("op_3551_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_547_cast_fp16, y = var_3551_to_fp16)[name = tensor("aw_chunk_547_cast_fp16")]; tensor var_3553_to_fp16 = const()[name = tensor("op_3553_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_549_cast_fp16, y = var_3553_to_fp16)[name = tensor("aw_chunk_549_cast_fp16")]; tensor var_3555_to_fp16 = const()[name = tensor("op_3555_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_551_cast_fp16, y = var_3555_to_fp16)[name = tensor("aw_chunk_551_cast_fp16")]; tensor var_3557_to_fp16 = const()[name = tensor("op_3557_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_553_cast_fp16, y = var_3557_to_fp16)[name = tensor("aw_chunk_553_cast_fp16")]; tensor var_3559_to_fp16 = const()[name = tensor("op_3559_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_555_cast_fp16, y = var_3559_to_fp16)[name = tensor("aw_chunk_555_cast_fp16")]; tensor var_3561_to_fp16 = const()[name = tensor("op_3561_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_557_cast_fp16, y = var_3561_to_fp16)[name = tensor("aw_chunk_557_cast_fp16")]; tensor var_3563_to_fp16 = const()[name = tensor("op_3563_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_559_cast_fp16, y = var_3563_to_fp16)[name = tensor("aw_chunk_559_cast_fp16")]; tensor var_3565_to_fp16 = const()[name = tensor("op_3565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_561_cast_fp16, y = var_3565_to_fp16)[name = tensor("aw_chunk_561_cast_fp16")]; tensor var_3567_to_fp16 = const()[name = tensor("op_3567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_563_cast_fp16, y = var_3567_to_fp16)[name = tensor("aw_chunk_563_cast_fp16")]; tensor var_3569_to_fp16 = const()[name = tensor("op_3569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_565_cast_fp16, y = var_3569_to_fp16)[name = tensor("aw_chunk_565_cast_fp16")]; tensor var_3571_to_fp16 = const()[name = tensor("op_3571_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_567_cast_fp16, y = var_3571_to_fp16)[name = tensor("aw_chunk_567_cast_fp16")]; tensor var_3573_to_fp16 = const()[name = tensor("op_3573_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_569_cast_fp16, y = var_3573_to_fp16)[name = tensor("aw_chunk_569_cast_fp16")]; tensor var_3575_to_fp16 = const()[name = tensor("op_3575_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_571_cast_fp16, y = var_3575_to_fp16)[name = tensor("aw_chunk_571_cast_fp16")]; tensor var_3577_to_fp16 = const()[name = tensor("op_3577_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_573_cast_fp16, y = var_3577_to_fp16)[name = tensor("aw_chunk_573_cast_fp16")]; tensor var_3579_to_fp16 = const()[name = tensor("op_3579_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_cast_fp16, y = var_3579_to_fp16)[name = tensor("aw_chunk_cast_fp16")]; tensor var_3581_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_481_cast_fp16)[name = tensor("op_3581_cast_fp16")]; tensor var_3582_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_483_cast_fp16)[name = tensor("op_3582_cast_fp16")]; tensor var_3583_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_485_cast_fp16)[name = tensor("op_3583_cast_fp16")]; tensor var_3584_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_487_cast_fp16)[name = tensor("op_3584_cast_fp16")]; tensor var_3585_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_489_cast_fp16)[name = tensor("op_3585_cast_fp16")]; tensor var_3586_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_491_cast_fp16)[name = tensor("op_3586_cast_fp16")]; tensor var_3587_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_493_cast_fp16)[name = tensor("op_3587_cast_fp16")]; tensor var_3588_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_495_cast_fp16)[name = tensor("op_3588_cast_fp16")]; tensor var_3589_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_497_cast_fp16)[name = tensor("op_3589_cast_fp16")]; tensor var_3590_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_499_cast_fp16)[name = tensor("op_3590_cast_fp16")]; tensor var_3591_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_501_cast_fp16)[name = tensor("op_3591_cast_fp16")]; tensor var_3592_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_503_cast_fp16)[name = tensor("op_3592_cast_fp16")]; tensor var_3593_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_505_cast_fp16)[name = tensor("op_3593_cast_fp16")]; tensor var_3594_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_507_cast_fp16)[name = tensor("op_3594_cast_fp16")]; tensor var_3595_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_509_cast_fp16)[name = tensor("op_3595_cast_fp16")]; tensor var_3596_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_511_cast_fp16)[name = tensor("op_3596_cast_fp16")]; tensor var_3597_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_513_cast_fp16)[name = tensor("op_3597_cast_fp16")]; tensor var_3598_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_515_cast_fp16)[name = tensor("op_3598_cast_fp16")]; tensor var_3599_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_517_cast_fp16)[name = tensor("op_3599_cast_fp16")]; tensor var_3600_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_519_cast_fp16)[name = tensor("op_3600_cast_fp16")]; tensor var_3601_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_521_cast_fp16)[name = tensor("op_3601_cast_fp16")]; tensor var_3602_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_523_cast_fp16)[name = tensor("op_3602_cast_fp16")]; tensor var_3603_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_525_cast_fp16)[name = tensor("op_3603_cast_fp16")]; tensor var_3604_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_527_cast_fp16)[name = tensor("op_3604_cast_fp16")]; tensor var_3605_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_529_cast_fp16)[name = tensor("op_3605_cast_fp16")]; tensor var_3606_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_531_cast_fp16)[name = tensor("op_3606_cast_fp16")]; tensor var_3607_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_533_cast_fp16)[name = tensor("op_3607_cast_fp16")]; tensor var_3608_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_535_cast_fp16)[name = tensor("op_3608_cast_fp16")]; tensor var_3609_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_537_cast_fp16)[name = tensor("op_3609_cast_fp16")]; tensor var_3610_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_539_cast_fp16)[name = tensor("op_3610_cast_fp16")]; tensor var_3611_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_541_cast_fp16)[name = tensor("op_3611_cast_fp16")]; tensor var_3612_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_543_cast_fp16)[name = tensor("op_3612_cast_fp16")]; tensor var_3613_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_545_cast_fp16)[name = tensor("op_3613_cast_fp16")]; tensor var_3614_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_547_cast_fp16)[name = tensor("op_3614_cast_fp16")]; tensor var_3615_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_549_cast_fp16)[name = tensor("op_3615_cast_fp16")]; tensor var_3616_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_551_cast_fp16)[name = tensor("op_3616_cast_fp16")]; tensor var_3617_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_553_cast_fp16)[name = tensor("op_3617_cast_fp16")]; tensor var_3618_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_555_cast_fp16)[name = tensor("op_3618_cast_fp16")]; tensor var_3619_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_557_cast_fp16)[name = tensor("op_3619_cast_fp16")]; tensor var_3620_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_559_cast_fp16)[name = tensor("op_3620_cast_fp16")]; tensor var_3621_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_561_cast_fp16)[name = tensor("op_3621_cast_fp16")]; tensor var_3622_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_563_cast_fp16)[name = tensor("op_3622_cast_fp16")]; tensor var_3623_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_565_cast_fp16)[name = tensor("op_3623_cast_fp16")]; tensor var_3624_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_567_cast_fp16)[name = tensor("op_3624_cast_fp16")]; tensor var_3625_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_569_cast_fp16)[name = tensor("op_3625_cast_fp16")]; tensor var_3626_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_571_cast_fp16)[name = tensor("op_3626_cast_fp16")]; tensor var_3627_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_573_cast_fp16)[name = tensor("op_3627_cast_fp16")]; tensor var_3628_cast_fp16 = softmax(axis = var_3193, x = aw_chunk_cast_fp16)[name = tensor("op_3628_cast_fp16")]; tensor var_3630_equation_0 = const()[name = tensor("op_3630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3630_cast_fp16 = einsum(equation = var_3630_equation_0, values = (var_3358_cast_fp16, var_3581_cast_fp16))[name = tensor("op_3630_cast_fp16")]; tensor var_3632_equation_0 = const()[name = tensor("op_3632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3632_cast_fp16 = einsum(equation = var_3632_equation_0, values = (var_3358_cast_fp16, var_3582_cast_fp16))[name = tensor("op_3632_cast_fp16")]; tensor var_3634_equation_0 = const()[name = tensor("op_3634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3634_cast_fp16 = einsum(equation = var_3634_equation_0, values = (var_3358_cast_fp16, var_3583_cast_fp16))[name = tensor("op_3634_cast_fp16")]; tensor var_3636_equation_0 = const()[name = tensor("op_3636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3636_cast_fp16 = einsum(equation = var_3636_equation_0, values = (var_3358_cast_fp16, var_3584_cast_fp16))[name = tensor("op_3636_cast_fp16")]; tensor var_3638_equation_0 = const()[name = tensor("op_3638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3638_cast_fp16 = einsum(equation = var_3638_equation_0, values = (var_3358_cast_fp16, var_3585_cast_fp16))[name = tensor("op_3638_cast_fp16")]; tensor var_3640_equation_0 = const()[name = tensor("op_3640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3640_cast_fp16 = einsum(equation = var_3640_equation_0, values = (var_3358_cast_fp16, var_3586_cast_fp16))[name = tensor("op_3640_cast_fp16")]; tensor var_3642_equation_0 = const()[name = tensor("op_3642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3642_cast_fp16 = einsum(equation = var_3642_equation_0, values = (var_3362_cast_fp16, var_3587_cast_fp16))[name = tensor("op_3642_cast_fp16")]; tensor var_3644_equation_0 = const()[name = tensor("op_3644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3644_cast_fp16 = einsum(equation = var_3644_equation_0, values = (var_3362_cast_fp16, var_3588_cast_fp16))[name = tensor("op_3644_cast_fp16")]; tensor var_3646_equation_0 = const()[name = tensor("op_3646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3646_cast_fp16 = einsum(equation = var_3646_equation_0, values = (var_3362_cast_fp16, var_3589_cast_fp16))[name = tensor("op_3646_cast_fp16")]; tensor var_3648_equation_0 = const()[name = tensor("op_3648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3648_cast_fp16 = einsum(equation = var_3648_equation_0, values = (var_3362_cast_fp16, var_3590_cast_fp16))[name = tensor("op_3648_cast_fp16")]; tensor var_3650_equation_0 = const()[name = tensor("op_3650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3650_cast_fp16 = einsum(equation = var_3650_equation_0, values = (var_3362_cast_fp16, var_3591_cast_fp16))[name = tensor("op_3650_cast_fp16")]; tensor var_3652_equation_0 = const()[name = tensor("op_3652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3652_cast_fp16 = einsum(equation = var_3652_equation_0, values = (var_3362_cast_fp16, var_3592_cast_fp16))[name = tensor("op_3652_cast_fp16")]; tensor var_3654_equation_0 = const()[name = tensor("op_3654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3654_cast_fp16 = einsum(equation = var_3654_equation_0, values = (var_3366_cast_fp16, var_3593_cast_fp16))[name = tensor("op_3654_cast_fp16")]; tensor var_3656_equation_0 = const()[name = tensor("op_3656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3656_cast_fp16 = einsum(equation = var_3656_equation_0, values = (var_3366_cast_fp16, var_3594_cast_fp16))[name = tensor("op_3656_cast_fp16")]; tensor var_3658_equation_0 = const()[name = tensor("op_3658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3658_cast_fp16 = einsum(equation = var_3658_equation_0, values = (var_3366_cast_fp16, var_3595_cast_fp16))[name = tensor("op_3658_cast_fp16")]; tensor var_3660_equation_0 = const()[name = tensor("op_3660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3660_cast_fp16 = einsum(equation = var_3660_equation_0, values = (var_3366_cast_fp16, var_3596_cast_fp16))[name = tensor("op_3660_cast_fp16")]; tensor var_3662_equation_0 = const()[name = tensor("op_3662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3662_cast_fp16 = einsum(equation = var_3662_equation_0, values = (var_3366_cast_fp16, var_3597_cast_fp16))[name = tensor("op_3662_cast_fp16")]; tensor var_3664_equation_0 = const()[name = tensor("op_3664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3664_cast_fp16 = einsum(equation = var_3664_equation_0, values = (var_3366_cast_fp16, var_3598_cast_fp16))[name = tensor("op_3664_cast_fp16")]; tensor var_3666_equation_0 = const()[name = tensor("op_3666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3666_cast_fp16 = einsum(equation = var_3666_equation_0, values = (var_3370_cast_fp16, var_3599_cast_fp16))[name = tensor("op_3666_cast_fp16")]; tensor var_3668_equation_0 = const()[name = tensor("op_3668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3668_cast_fp16 = einsum(equation = var_3668_equation_0, values = (var_3370_cast_fp16, var_3600_cast_fp16))[name = tensor("op_3668_cast_fp16")]; tensor var_3670_equation_0 = const()[name = tensor("op_3670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3670_cast_fp16 = einsum(equation = var_3670_equation_0, values = (var_3370_cast_fp16, var_3601_cast_fp16))[name = tensor("op_3670_cast_fp16")]; tensor var_3672_equation_0 = const()[name = tensor("op_3672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3672_cast_fp16 = einsum(equation = var_3672_equation_0, values = (var_3370_cast_fp16, var_3602_cast_fp16))[name = tensor("op_3672_cast_fp16")]; tensor var_3674_equation_0 = const()[name = tensor("op_3674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3674_cast_fp16 = einsum(equation = var_3674_equation_0, values = (var_3370_cast_fp16, var_3603_cast_fp16))[name = tensor("op_3674_cast_fp16")]; tensor var_3676_equation_0 = const()[name = tensor("op_3676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3676_cast_fp16 = einsum(equation = var_3676_equation_0, values = (var_3370_cast_fp16, var_3604_cast_fp16))[name = tensor("op_3676_cast_fp16")]; tensor var_3678_equation_0 = const()[name = tensor("op_3678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3678_cast_fp16 = einsum(equation = var_3678_equation_0, values = (var_3374_cast_fp16, var_3605_cast_fp16))[name = tensor("op_3678_cast_fp16")]; tensor var_3680_equation_0 = const()[name = tensor("op_3680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3680_cast_fp16 = einsum(equation = var_3680_equation_0, values = (var_3374_cast_fp16, var_3606_cast_fp16))[name = tensor("op_3680_cast_fp16")]; tensor var_3682_equation_0 = const()[name = tensor("op_3682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3682_cast_fp16 = einsum(equation = var_3682_equation_0, values = (var_3374_cast_fp16, var_3607_cast_fp16))[name = tensor("op_3682_cast_fp16")]; tensor var_3684_equation_0 = const()[name = tensor("op_3684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3684_cast_fp16 = einsum(equation = var_3684_equation_0, values = (var_3374_cast_fp16, var_3608_cast_fp16))[name = tensor("op_3684_cast_fp16")]; tensor var_3686_equation_0 = const()[name = tensor("op_3686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3686_cast_fp16 = einsum(equation = var_3686_equation_0, values = (var_3374_cast_fp16, var_3609_cast_fp16))[name = tensor("op_3686_cast_fp16")]; tensor var_3688_equation_0 = const()[name = tensor("op_3688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3688_cast_fp16 = einsum(equation = var_3688_equation_0, values = (var_3374_cast_fp16, var_3610_cast_fp16))[name = tensor("op_3688_cast_fp16")]; tensor var_3690_equation_0 = const()[name = tensor("op_3690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3690_cast_fp16 = einsum(equation = var_3690_equation_0, values = (var_3378_cast_fp16, var_3611_cast_fp16))[name = tensor("op_3690_cast_fp16")]; tensor var_3692_equation_0 = const()[name = tensor("op_3692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3692_cast_fp16 = einsum(equation = var_3692_equation_0, values = (var_3378_cast_fp16, var_3612_cast_fp16))[name = tensor("op_3692_cast_fp16")]; tensor var_3694_equation_0 = const()[name = tensor("op_3694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3694_cast_fp16 = einsum(equation = var_3694_equation_0, values = (var_3378_cast_fp16, var_3613_cast_fp16))[name = tensor("op_3694_cast_fp16")]; tensor var_3696_equation_0 = const()[name = tensor("op_3696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3696_cast_fp16 = einsum(equation = var_3696_equation_0, values = (var_3378_cast_fp16, var_3614_cast_fp16))[name = tensor("op_3696_cast_fp16")]; tensor var_3698_equation_0 = const()[name = tensor("op_3698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3698_cast_fp16 = einsum(equation = var_3698_equation_0, values = (var_3378_cast_fp16, var_3615_cast_fp16))[name = tensor("op_3698_cast_fp16")]; tensor var_3700_equation_0 = const()[name = tensor("op_3700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3700_cast_fp16 = einsum(equation = var_3700_equation_0, values = (var_3378_cast_fp16, var_3616_cast_fp16))[name = tensor("op_3700_cast_fp16")]; tensor var_3702_equation_0 = const()[name = tensor("op_3702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3702_cast_fp16 = einsum(equation = var_3702_equation_0, values = (var_3382_cast_fp16, var_3617_cast_fp16))[name = tensor("op_3702_cast_fp16")]; tensor var_3704_equation_0 = const()[name = tensor("op_3704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3704_cast_fp16 = einsum(equation = var_3704_equation_0, values = (var_3382_cast_fp16, var_3618_cast_fp16))[name = tensor("op_3704_cast_fp16")]; tensor var_3706_equation_0 = const()[name = tensor("op_3706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3706_cast_fp16 = einsum(equation = var_3706_equation_0, values = (var_3382_cast_fp16, var_3619_cast_fp16))[name = tensor("op_3706_cast_fp16")]; tensor var_3708_equation_0 = const()[name = tensor("op_3708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3708_cast_fp16 = einsum(equation = var_3708_equation_0, values = (var_3382_cast_fp16, var_3620_cast_fp16))[name = tensor("op_3708_cast_fp16")]; tensor var_3710_equation_0 = const()[name = tensor("op_3710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3710_cast_fp16 = einsum(equation = var_3710_equation_0, values = (var_3382_cast_fp16, var_3621_cast_fp16))[name = tensor("op_3710_cast_fp16")]; tensor var_3712_equation_0 = const()[name = tensor("op_3712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3712_cast_fp16 = einsum(equation = var_3712_equation_0, values = (var_3382_cast_fp16, var_3622_cast_fp16))[name = tensor("op_3712_cast_fp16")]; tensor var_3714_equation_0 = const()[name = tensor("op_3714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3714_cast_fp16 = einsum(equation = var_3714_equation_0, values = (var_3386_cast_fp16, var_3623_cast_fp16))[name = tensor("op_3714_cast_fp16")]; tensor var_3716_equation_0 = const()[name = tensor("op_3716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3716_cast_fp16 = einsum(equation = var_3716_equation_0, values = (var_3386_cast_fp16, var_3624_cast_fp16))[name = tensor("op_3716_cast_fp16")]; tensor var_3718_equation_0 = const()[name = tensor("op_3718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3718_cast_fp16 = einsum(equation = var_3718_equation_0, values = (var_3386_cast_fp16, var_3625_cast_fp16))[name = tensor("op_3718_cast_fp16")]; tensor var_3720_equation_0 = const()[name = tensor("op_3720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3720_cast_fp16 = einsum(equation = var_3720_equation_0, values = (var_3386_cast_fp16, var_3626_cast_fp16))[name = tensor("op_3720_cast_fp16")]; tensor var_3722_equation_0 = const()[name = tensor("op_3722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3722_cast_fp16 = einsum(equation = var_3722_equation_0, values = (var_3386_cast_fp16, var_3627_cast_fp16))[name = tensor("op_3722_cast_fp16")]; tensor var_3724_equation_0 = const()[name = tensor("op_3724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3724_cast_fp16 = einsum(equation = var_3724_equation_0, values = (var_3386_cast_fp16, var_3628_cast_fp16))[name = tensor("op_3724_cast_fp16")]; tensor var_3726_interleave_0 = const()[name = tensor("op_3726_interleave_0"), val = tensor(false)]; tensor var_3726_cast_fp16 = concat(axis = var_3180, interleave = var_3726_interleave_0, values = (var_3630_cast_fp16, var_3632_cast_fp16, var_3634_cast_fp16, var_3636_cast_fp16, var_3638_cast_fp16, var_3640_cast_fp16))[name = tensor("op_3726_cast_fp16")]; tensor var_3728_interleave_0 = const()[name = tensor("op_3728_interleave_0"), val = tensor(false)]; tensor var_3728_cast_fp16 = concat(axis = var_3180, interleave = var_3728_interleave_0, values = (var_3642_cast_fp16, var_3644_cast_fp16, var_3646_cast_fp16, var_3648_cast_fp16, var_3650_cast_fp16, var_3652_cast_fp16))[name = tensor("op_3728_cast_fp16")]; tensor var_3730_interleave_0 = const()[name = tensor("op_3730_interleave_0"), val = tensor(false)]; tensor var_3730_cast_fp16 = concat(axis = var_3180, interleave = var_3730_interleave_0, values = (var_3654_cast_fp16, var_3656_cast_fp16, var_3658_cast_fp16, var_3660_cast_fp16, var_3662_cast_fp16, var_3664_cast_fp16))[name = tensor("op_3730_cast_fp16")]; tensor var_3732_interleave_0 = const()[name = tensor("op_3732_interleave_0"), val = tensor(false)]; tensor var_3732_cast_fp16 = concat(axis = var_3180, interleave = var_3732_interleave_0, values = (var_3666_cast_fp16, var_3668_cast_fp16, var_3670_cast_fp16, var_3672_cast_fp16, var_3674_cast_fp16, var_3676_cast_fp16))[name = tensor("op_3732_cast_fp16")]; tensor var_3734_interleave_0 = const()[name = tensor("op_3734_interleave_0"), val = tensor(false)]; tensor var_3734_cast_fp16 = concat(axis = var_3180, interleave = var_3734_interleave_0, values = (var_3678_cast_fp16, var_3680_cast_fp16, var_3682_cast_fp16, var_3684_cast_fp16, var_3686_cast_fp16, var_3688_cast_fp16))[name = tensor("op_3734_cast_fp16")]; tensor var_3736_interleave_0 = const()[name = tensor("op_3736_interleave_0"), val = tensor(false)]; tensor var_3736_cast_fp16 = concat(axis = var_3180, interleave = var_3736_interleave_0, values = (var_3690_cast_fp16, var_3692_cast_fp16, var_3694_cast_fp16, var_3696_cast_fp16, var_3698_cast_fp16, var_3700_cast_fp16))[name = tensor("op_3736_cast_fp16")]; tensor var_3738_interleave_0 = const()[name = tensor("op_3738_interleave_0"), val = tensor(false)]; tensor var_3738_cast_fp16 = concat(axis = var_3180, interleave = var_3738_interleave_0, values = (var_3702_cast_fp16, var_3704_cast_fp16, var_3706_cast_fp16, var_3708_cast_fp16, var_3710_cast_fp16, var_3712_cast_fp16))[name = tensor("op_3738_cast_fp16")]; tensor var_3740_interleave_0 = const()[name = tensor("op_3740_interleave_0"), val = tensor(false)]; tensor var_3740_cast_fp16 = concat(axis = var_3180, interleave = var_3740_interleave_0, values = (var_3714_cast_fp16, var_3716_cast_fp16, var_3718_cast_fp16, var_3720_cast_fp16, var_3722_cast_fp16, var_3724_cast_fp16))[name = tensor("op_3740_cast_fp16")]; tensor input_41_interleave_0 = const()[name = tensor("input_41_interleave_0"), val = tensor(false)]; tensor input_41_cast_fp16 = concat(axis = var_3193, interleave = input_41_interleave_0, values = (var_3726_cast_fp16, var_3728_cast_fp16, var_3730_cast_fp16, var_3732_cast_fp16, var_3734_cast_fp16, var_3736_cast_fp16, var_3738_cast_fp16, var_3740_cast_fp16))[name = tensor("input_41_cast_fp16")]; tensor obj_pad_type_0 = const()[name = tensor("obj_pad_type_0"), val = tensor("valid")]; tensor obj_strides_0 = const()[name = tensor("obj_strides_0"), val = tensor([1, 1])]; tensor obj_pad_0 = const()[name = tensor("obj_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_dilations_0 = const()[name = tensor("obj_dilations_0"), val = tensor([1, 1])]; tensor obj_groups_0 = const()[name = tensor("obj_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36460160)))]; tensor layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36984512)))]; tensor obj_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("obj_cast_fp16")]; tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; tensor var_3759_to_fp16 = const()[name = tensor("op_3759_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_3759_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36985600)))]; tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36986688)))]; tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor input_45_pad_type_0 = const()[name = tensor("input_45_pad_type_0"), val = tensor("valid")]; tensor input_45_strides_0 = const()[name = tensor("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = tensor("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = tensor("input_45_dilations_0"), val = tensor([1, 1])]; tensor input_45_groups_0 = const()[name = tensor("input_45_groups_0"), val = tensor(1)]; tensor layers_5_fc1_weight_to_fp16 = const()[name = tensor("layers_5_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36987776)))]; tensor layers_5_fc1_bias_to_fp16 = const()[name = tensor("layers_5_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39084992)))]; tensor input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_45_cast_fp16)[name = tensor("input_cast_fp16")]; tensor hidden_states_pad_type_0 = const()[name = tensor("hidden_states_pad_type_0"), val = tensor("valid")]; tensor hidden_states_strides_0 = const()[name = tensor("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = tensor("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = tensor("hidden_states_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_groups_0 = const()[name = tensor("hidden_states_groups_0"), val = tensor(1)]; tensor layers_5_fc2_weight_to_fp16 = const()[name = tensor("layers_5_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39089152)))]; tensor layers_5_fc2_bias_to_fp16 = const()[name = tensor("layers_5_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41186368)))]; tensor hidden_states_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = layers_5_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; tensor inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; tensor var_3797_to_fp16 = const()[name = tensor("op_3797_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3797_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41187456)))]; tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41188544)))]; tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; } -> (encoder_output_embeds); }