program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})] { func main(tensor melspectrogram_features) { tensor var_66_pad_type_0 = const()[name = tensor("op_66_pad_type_0"), val = tensor("custom")]; tensor var_66_pad_0 = const()[name = tensor("op_66_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_66_strides_0 = const()[name = tensor("op_66_strides_0"), val = tensor([1, 1])]; tensor var_66_dilations_0 = const()[name = tensor("op_66_dilations_0"), val = tensor([1, 1])]; tensor var_66_groups_0 = const()[name = tensor("op_66_groups_0"), val = tensor(1)]; tensor var_41_to_fp16 = const()[name = tensor("op_41_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor var_47_to_fp16 = const()[name = tensor("op_47_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368768)))]; tensor var_66_cast_fp16 = conv(bias = var_47_to_fp16, dilations = var_66_dilations_0, groups = var_66_groups_0, pad = var_66_pad_0, pad_type = var_66_pad_type_0, strides = var_66_strides_0, weight = var_41_to_fp16, x = melspectrogram_features)[name = tensor("op_66_cast_fp16")]; tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_66_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; tensor var_106_pad_type_0 = const()[name = tensor("op_106_pad_type_0"), val = tensor("custom")]; tensor var_106_pad_0 = const()[name = tensor("op_106_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_106_strides_0 = const()[name = tensor("op_106_strides_0"), val = tensor([2, 2])]; tensor var_106_dilations_0 = const()[name = tensor("op_106_dilations_0"), val = tensor([1, 1])]; tensor var_106_groups_0 = const()[name = tensor("op_106_groups_0"), val = tensor(1)]; tensor var_81_to_fp16 = const()[name = tensor("op_81_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(370368)))]; tensor var_87_to_fp16 = const()[name = tensor("op_87_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3909376)))]; tensor var_106_cast_fp16 = conv(bias = var_87_to_fp16, dilations = var_106_dilations_0, groups = var_106_groups_0, pad = var_106_pad_0, pad_type = var_106_pad_type_0, strides = var_106_strides_0, weight = var_81_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_106_cast_fp16")]; tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_106_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; tensor var_124_to_fp16 = const()[name = tensor("op_124_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3910976)))]; tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_124_to_fp16)[name = tensor("inputs_1_cast_fp16")]; tensor var_137 = const()[name = tensor("op_137"), val = tensor(3)]; tensor var_153 = const()[name = tensor("op_153"), val = tensor(1)]; tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; tensor var_170_to_fp16 = const()[name = tensor("op_170_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_170_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6215040)))]; tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6216640)))]; tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6218240)))]; tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6219840)))]; tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; tensor query_1_pad_type_0 = const()[name = tensor("query_1_pad_type_0"), val = tensor("valid")]; tensor query_1_strides_0 = const()[name = tensor("query_1_strides_0"), val = tensor([1, 1])]; tensor query_1_pad_0 = const()[name = tensor("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_1_dilations_0 = const()[name = tensor("query_1_dilations_0"), val = tensor([1, 1])]; tensor query_1_groups_0 = const()[name = tensor("query_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6221440)))]; tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7401152)))]; tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("query_1_cast_fp16")]; tensor key_1_pad_type_0 = const()[name = tensor("key_1_pad_type_0"), val = tensor("valid")]; tensor key_1_strides_0 = const()[name = tensor("key_1_strides_0"), val = tensor([1, 1])]; tensor key_1_pad_0 = const()[name = tensor("key_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_1_dilations_0 = const()[name = tensor("key_1_dilations_0"), val = tensor([1, 1])]; tensor key_1_groups_0 = const()[name = tensor("key_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7402752)))]; tensor key_1_cast_fp16 = conv(dilations = key_1_dilations_0, groups = key_1_groups_0, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("key_1_cast_fp16")]; tensor value_1_pad_type_0 = const()[name = tensor("value_1_pad_type_0"), val = tensor("valid")]; tensor value_1_strides_0 = const()[name = tensor("value_1_strides_0"), val = tensor([1, 1])]; tensor value_1_pad_0 = const()[name = tensor("value_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_1_dilations_0 = const()[name = tensor("value_1_dilations_0"), val = tensor([1, 1])]; tensor value_1_groups_0 = const()[name = tensor("value_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8582464)))]; tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9762176)))]; tensor value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = value_1_dilations_0, groups = value_1_groups_0, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("value_1_cast_fp16")]; tensor var_205_begin_0 = const()[name = tensor("op_205_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_205_end_0 = const()[name = tensor("op_205_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_205_end_mask_0 = const()[name = tensor("op_205_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_205_cast_fp16 = slice_by_index(begin = var_205_begin_0, end = var_205_end_0, end_mask = var_205_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_205_cast_fp16")]; tensor var_209_begin_0 = const()[name = tensor("op_209_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_209_end_0 = const()[name = tensor("op_209_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_209_end_mask_0 = const()[name = tensor("op_209_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_209_cast_fp16 = slice_by_index(begin = var_209_begin_0, end = var_209_end_0, end_mask = var_209_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_209_cast_fp16")]; tensor var_213_begin_0 = const()[name = tensor("op_213_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_213_end_0 = const()[name = tensor("op_213_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_213_end_mask_0 = const()[name = tensor("op_213_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_213_cast_fp16 = slice_by_index(begin = var_213_begin_0, end = var_213_end_0, end_mask = var_213_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_213_cast_fp16")]; tensor var_217_begin_0 = const()[name = tensor("op_217_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_217_end_0 = const()[name = tensor("op_217_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_217_end_mask_0 = const()[name = tensor("op_217_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_217_cast_fp16 = slice_by_index(begin = var_217_begin_0, end = var_217_end_0, end_mask = var_217_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_217_cast_fp16")]; tensor var_221_begin_0 = const()[name = tensor("op_221_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_221_end_0 = const()[name = tensor("op_221_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_221_end_mask_0 = const()[name = tensor("op_221_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_221_cast_fp16 = slice_by_index(begin = var_221_begin_0, end = var_221_end_0, end_mask = var_221_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_221_cast_fp16")]; tensor var_225_begin_0 = const()[name = tensor("op_225_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_225_end_0 = const()[name = tensor("op_225_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_225_end_mask_0 = const()[name = tensor("op_225_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_225_cast_fp16 = slice_by_index(begin = var_225_begin_0, end = var_225_end_0, end_mask = var_225_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_225_cast_fp16")]; tensor var_229_begin_0 = const()[name = tensor("op_229_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_229_end_0 = const()[name = tensor("op_229_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_229_end_mask_0 = const()[name = tensor("op_229_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_229_cast_fp16 = slice_by_index(begin = var_229_begin_0, end = var_229_end_0, end_mask = var_229_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_229_cast_fp16")]; tensor var_233_begin_0 = const()[name = tensor("op_233_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_233_end_0 = const()[name = tensor("op_233_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_233_end_mask_0 = const()[name = tensor("op_233_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_233_cast_fp16 = slice_by_index(begin = var_233_begin_0, end = var_233_end_0, end_mask = var_233_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_233_cast_fp16")]; tensor var_237_begin_0 = const()[name = tensor("op_237_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_237_end_0 = const()[name = tensor("op_237_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_237_end_mask_0 = const()[name = tensor("op_237_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_237_cast_fp16 = slice_by_index(begin = var_237_begin_0, end = var_237_end_0, end_mask = var_237_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_237_cast_fp16")]; tensor var_241_begin_0 = const()[name = tensor("op_241_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_241_end_0 = const()[name = tensor("op_241_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_241_end_mask_0 = const()[name = tensor("op_241_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_241_cast_fp16 = slice_by_index(begin = var_241_begin_0, end = var_241_end_0, end_mask = var_241_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_241_cast_fp16")]; tensor var_245_begin_0 = const()[name = tensor("op_245_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_245_end_0 = const()[name = tensor("op_245_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_245_end_mask_0 = const()[name = tensor("op_245_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_245_cast_fp16 = slice_by_index(begin = var_245_begin_0, end = var_245_end_0, end_mask = var_245_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_245_cast_fp16")]; tensor var_249_begin_0 = const()[name = tensor("op_249_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_249_end_0 = const()[name = tensor("op_249_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_249_end_mask_0 = const()[name = tensor("op_249_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_249_cast_fp16 = slice_by_index(begin = var_249_begin_0, end = var_249_end_0, end_mask = var_249_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_249_cast_fp16")]; tensor var_252_begin_0 = const()[name = tensor("op_252_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_252_end_0 = const()[name = tensor("op_252_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_252_end_mask_0 = const()[name = tensor("op_252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_252_cast_fp16 = slice_by_index(begin = var_252_begin_0, end = var_252_end_0, end_mask = var_252_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_252_cast_fp16")]; tensor var_253_begin_0 = const()[name = tensor("op_253_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_253_end_0 = const()[name = tensor("op_253_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_253_end_mask_0 = const()[name = tensor("op_253_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_253_cast_fp16 = slice_by_index(begin = var_253_begin_0, end = var_253_end_0, end_mask = var_253_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_253_cast_fp16")]; tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_254_cast_fp16")]; tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_255_cast_fp16")]; tensor var_256_begin_0 = const()[name = tensor("op_256_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_256_end_0 = const()[name = tensor("op_256_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_256_end_mask_0 = const()[name = tensor("op_256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_256_cast_fp16 = slice_by_index(begin = var_256_begin_0, end = var_256_end_0, end_mask = var_256_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_256_cast_fp16")]; tensor var_257_begin_0 = const()[name = tensor("op_257_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_257_end_0 = const()[name = tensor("op_257_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_257_end_mask_0 = const()[name = tensor("op_257_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_257_cast_fp16 = slice_by_index(begin = var_257_begin_0, end = var_257_end_0, end_mask = var_257_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_257_cast_fp16")]; tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = var_209_cast_fp16)[name = tensor("op_258_cast_fp16")]; tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = var_209_cast_fp16)[name = tensor("op_259_cast_fp16")]; tensor var_260_begin_0 = const()[name = tensor("op_260_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_260_end_0 = const()[name = tensor("op_260_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_260_end_mask_0 = const()[name = tensor("op_260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_260_cast_fp16 = slice_by_index(begin = var_260_begin_0, end = var_260_end_0, end_mask = var_260_end_mask_0, x = var_209_cast_fp16)[name = tensor("op_260_cast_fp16")]; tensor var_261_begin_0 = const()[name = tensor("op_261_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_261_end_0 = const()[name = tensor("op_261_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_261_end_mask_0 = const()[name = tensor("op_261_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_261_cast_fp16 = slice_by_index(begin = var_261_begin_0, end = var_261_end_0, end_mask = var_261_end_mask_0, x = var_209_cast_fp16)[name = tensor("op_261_cast_fp16")]; tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = var_209_cast_fp16)[name = tensor("op_262_cast_fp16")]; tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = var_209_cast_fp16)[name = tensor("op_263_cast_fp16")]; tensor var_264_begin_0 = const()[name = tensor("op_264_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_264_end_0 = const()[name = tensor("op_264_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_264_end_mask_0 = const()[name = tensor("op_264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_264_cast_fp16 = slice_by_index(begin = var_264_begin_0, end = var_264_end_0, end_mask = var_264_end_mask_0, x = var_213_cast_fp16)[name = tensor("op_264_cast_fp16")]; tensor var_265_begin_0 = const()[name = tensor("op_265_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_265_end_0 = const()[name = tensor("op_265_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_265_end_mask_0 = const()[name = tensor("op_265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_265_cast_fp16 = slice_by_index(begin = var_265_begin_0, end = var_265_end_0, end_mask = var_265_end_mask_0, x = var_213_cast_fp16)[name = tensor("op_265_cast_fp16")]; tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = var_213_cast_fp16)[name = tensor("op_266_cast_fp16")]; tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = var_213_cast_fp16)[name = tensor("op_267_cast_fp16")]; tensor var_268_begin_0 = const()[name = tensor("op_268_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_268_end_0 = const()[name = tensor("op_268_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_268_end_mask_0 = const()[name = tensor("op_268_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = var_268_end_0, end_mask = var_268_end_mask_0, x = var_213_cast_fp16)[name = tensor("op_268_cast_fp16")]; tensor var_269_begin_0 = const()[name = tensor("op_269_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_269_end_0 = const()[name = tensor("op_269_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_269_end_mask_0 = const()[name = tensor("op_269_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_269_cast_fp16 = slice_by_index(begin = var_269_begin_0, end = var_269_end_0, end_mask = var_269_end_mask_0, x = var_213_cast_fp16)[name = tensor("op_269_cast_fp16")]; tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = var_217_cast_fp16)[name = tensor("op_270_cast_fp16")]; tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = var_217_cast_fp16)[name = tensor("op_271_cast_fp16")]; tensor var_272_begin_0 = const()[name = tensor("op_272_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_272_end_0 = const()[name = tensor("op_272_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_272_end_mask_0 = const()[name = tensor("op_272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_272_cast_fp16 = slice_by_index(begin = var_272_begin_0, end = var_272_end_0, end_mask = var_272_end_mask_0, x = var_217_cast_fp16)[name = tensor("op_272_cast_fp16")]; tensor var_273_begin_0 = const()[name = tensor("op_273_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_273_end_0 = const()[name = tensor("op_273_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_273_end_mask_0 = const()[name = tensor("op_273_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_273_cast_fp16 = slice_by_index(begin = var_273_begin_0, end = var_273_end_0, end_mask = var_273_end_mask_0, x = var_217_cast_fp16)[name = tensor("op_273_cast_fp16")]; tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = var_217_cast_fp16)[name = tensor("op_274_cast_fp16")]; tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = var_217_cast_fp16)[name = tensor("op_275_cast_fp16")]; tensor var_276_begin_0 = const()[name = tensor("op_276_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_276_end_0 = const()[name = tensor("op_276_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_276_end_mask_0 = const()[name = tensor("op_276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_276_cast_fp16 = slice_by_index(begin = var_276_begin_0, end = var_276_end_0, end_mask = var_276_end_mask_0, x = var_221_cast_fp16)[name = tensor("op_276_cast_fp16")]; tensor var_277_begin_0 = const()[name = tensor("op_277_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_277_end_0 = const()[name = tensor("op_277_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_277_end_mask_0 = const()[name = tensor("op_277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_277_cast_fp16 = slice_by_index(begin = var_277_begin_0, end = var_277_end_0, end_mask = var_277_end_mask_0, x = var_221_cast_fp16)[name = tensor("op_277_cast_fp16")]; tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = var_221_cast_fp16)[name = tensor("op_278_cast_fp16")]; tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = var_221_cast_fp16)[name = tensor("op_279_cast_fp16")]; tensor var_280_begin_0 = const()[name = tensor("op_280_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_280_end_0 = const()[name = tensor("op_280_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_280_end_mask_0 = const()[name = tensor("op_280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_280_cast_fp16 = slice_by_index(begin = var_280_begin_0, end = var_280_end_0, end_mask = var_280_end_mask_0, x = var_221_cast_fp16)[name = tensor("op_280_cast_fp16")]; tensor var_281_begin_0 = const()[name = tensor("op_281_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_281_end_0 = const()[name = tensor("op_281_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_281_end_mask_0 = const()[name = tensor("op_281_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_281_cast_fp16 = slice_by_index(begin = var_281_begin_0, end = var_281_end_0, end_mask = var_281_end_mask_0, x = var_221_cast_fp16)[name = tensor("op_281_cast_fp16")]; tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = var_225_cast_fp16)[name = tensor("op_282_cast_fp16")]; tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = var_225_cast_fp16)[name = tensor("op_283_cast_fp16")]; tensor var_284_begin_0 = const()[name = tensor("op_284_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_284_end_0 = const()[name = tensor("op_284_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_284_end_mask_0 = const()[name = tensor("op_284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_284_cast_fp16 = slice_by_index(begin = var_284_begin_0, end = var_284_end_0, end_mask = var_284_end_mask_0, x = var_225_cast_fp16)[name = tensor("op_284_cast_fp16")]; tensor var_285_begin_0 = const()[name = tensor("op_285_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_285_end_0 = const()[name = tensor("op_285_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_285_end_mask_0 = const()[name = tensor("op_285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = var_225_cast_fp16)[name = tensor("op_285_cast_fp16")]; tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = var_225_cast_fp16)[name = tensor("op_286_cast_fp16")]; tensor var_287_begin_0 = const()[name = tensor("op_287_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_287_end_0 = const()[name = tensor("op_287_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_287_end_mask_0 = const()[name = tensor("op_287_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_287_cast_fp16 = slice_by_index(begin = var_287_begin_0, end = var_287_end_0, end_mask = var_287_end_mask_0, x = var_225_cast_fp16)[name = tensor("op_287_cast_fp16")]; tensor var_288_begin_0 = const()[name = tensor("op_288_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_288_end_0 = const()[name = tensor("op_288_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_288_end_mask_0 = const()[name = tensor("op_288_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_288_cast_fp16 = slice_by_index(begin = var_288_begin_0, end = var_288_end_0, end_mask = var_288_end_mask_0, x = var_229_cast_fp16)[name = tensor("op_288_cast_fp16")]; tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = var_229_cast_fp16)[name = tensor("op_289_cast_fp16")]; tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = var_229_cast_fp16)[name = tensor("op_290_cast_fp16")]; tensor var_291_begin_0 = const()[name = tensor("op_291_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_291_end_0 = const()[name = tensor("op_291_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_291_end_mask_0 = const()[name = tensor("op_291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_291_cast_fp16 = slice_by_index(begin = var_291_begin_0, end = var_291_end_0, end_mask = var_291_end_mask_0, x = var_229_cast_fp16)[name = tensor("op_291_cast_fp16")]; tensor var_292_begin_0 = const()[name = tensor("op_292_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_292_end_0 = const()[name = tensor("op_292_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_292_end_mask_0 = const()[name = tensor("op_292_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_292_cast_fp16 = slice_by_index(begin = var_292_begin_0, end = var_292_end_0, end_mask = var_292_end_mask_0, x = var_229_cast_fp16)[name = tensor("op_292_cast_fp16")]; tensor var_293_begin_0 = const()[name = tensor("op_293_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_293_end_0 = const()[name = tensor("op_293_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_293_end_mask_0 = const()[name = tensor("op_293_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_293_cast_fp16 = slice_by_index(begin = var_293_begin_0, end = var_293_end_0, end_mask = var_293_end_mask_0, x = var_229_cast_fp16)[name = tensor("op_293_cast_fp16")]; tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = var_233_cast_fp16)[name = tensor("op_294_cast_fp16")]; tensor var_295_begin_0 = const()[name = tensor("op_295_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_295_end_0 = const()[name = tensor("op_295_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_295_end_mask_0 = const()[name = tensor("op_295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = var_233_cast_fp16)[name = tensor("op_295_cast_fp16")]; tensor var_296_begin_0 = const()[name = tensor("op_296_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_296_end_0 = const()[name = tensor("op_296_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_296_end_mask_0 = const()[name = tensor("op_296_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_296_cast_fp16 = slice_by_index(begin = var_296_begin_0, end = var_296_end_0, end_mask = var_296_end_mask_0, x = var_233_cast_fp16)[name = tensor("op_296_cast_fp16")]; tensor var_297_begin_0 = const()[name = tensor("op_297_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_297_end_0 = const()[name = tensor("op_297_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_297_end_mask_0 = const()[name = tensor("op_297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_297_cast_fp16 = slice_by_index(begin = var_297_begin_0, end = var_297_end_0, end_mask = var_297_end_mask_0, x = var_233_cast_fp16)[name = tensor("op_297_cast_fp16")]; tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = var_233_cast_fp16)[name = tensor("op_298_cast_fp16")]; tensor var_299_begin_0 = const()[name = tensor("op_299_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_299_end_0 = const()[name = tensor("op_299_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_299_end_mask_0 = const()[name = tensor("op_299_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_299_cast_fp16 = slice_by_index(begin = var_299_begin_0, end = var_299_end_0, end_mask = var_299_end_mask_0, x = var_233_cast_fp16)[name = tensor("op_299_cast_fp16")]; tensor var_300_begin_0 = const()[name = tensor("op_300_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_300_end_0 = const()[name = tensor("op_300_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_300_end_mask_0 = const()[name = tensor("op_300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_300_cast_fp16 = slice_by_index(begin = var_300_begin_0, end = var_300_end_0, end_mask = var_300_end_mask_0, x = var_237_cast_fp16)[name = tensor("op_300_cast_fp16")]; tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = var_237_cast_fp16)[name = tensor("op_301_cast_fp16")]; tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = var_237_cast_fp16)[name = tensor("op_302_cast_fp16")]; tensor var_303_begin_0 = const()[name = tensor("op_303_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_303_end_0 = const()[name = tensor("op_303_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_303_end_mask_0 = const()[name = tensor("op_303_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_303_cast_fp16 = slice_by_index(begin = var_303_begin_0, end = var_303_end_0, end_mask = var_303_end_mask_0, x = var_237_cast_fp16)[name = tensor("op_303_cast_fp16")]; tensor var_304_begin_0 = const()[name = tensor("op_304_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_304_end_0 = const()[name = tensor("op_304_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_304_end_mask_0 = const()[name = tensor("op_304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_304_cast_fp16 = slice_by_index(begin = var_304_begin_0, end = var_304_end_0, end_mask = var_304_end_mask_0, x = var_237_cast_fp16)[name = tensor("op_304_cast_fp16")]; tensor var_305_begin_0 = const()[name = tensor("op_305_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_305_end_0 = const()[name = tensor("op_305_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_305_end_mask_0 = const()[name = tensor("op_305_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_305_cast_fp16 = slice_by_index(begin = var_305_begin_0, end = var_305_end_0, end_mask = var_305_end_mask_0, x = var_237_cast_fp16)[name = tensor("op_305_cast_fp16")]; tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = var_241_cast_fp16)[name = tensor("op_306_cast_fp16")]; tensor var_307_begin_0 = const()[name = tensor("op_307_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_307_end_0 = const()[name = tensor("op_307_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_307_end_mask_0 = const()[name = tensor("op_307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_307_cast_fp16 = slice_by_index(begin = var_307_begin_0, end = var_307_end_0, end_mask = var_307_end_mask_0, x = var_241_cast_fp16)[name = tensor("op_307_cast_fp16")]; tensor var_308_begin_0 = const()[name = tensor("op_308_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_308_end_0 = const()[name = tensor("op_308_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_308_end_mask_0 = const()[name = tensor("op_308_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = var_241_cast_fp16)[name = tensor("op_308_cast_fp16")]; tensor var_309_begin_0 = const()[name = tensor("op_309_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_309_end_0 = const()[name = tensor("op_309_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_309_end_mask_0 = const()[name = tensor("op_309_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_309_cast_fp16 = slice_by_index(begin = var_309_begin_0, end = var_309_end_0, end_mask = var_309_end_mask_0, x = var_241_cast_fp16)[name = tensor("op_309_cast_fp16")]; tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = var_241_cast_fp16)[name = tensor("op_310_cast_fp16")]; tensor var_311_begin_0 = const()[name = tensor("op_311_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_311_end_0 = const()[name = tensor("op_311_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_311_end_mask_0 = const()[name = tensor("op_311_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = var_241_cast_fp16)[name = tensor("op_311_cast_fp16")]; tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = var_245_cast_fp16)[name = tensor("op_312_cast_fp16")]; tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = var_245_cast_fp16)[name = tensor("op_313_cast_fp16")]; tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = var_245_cast_fp16)[name = tensor("op_314_cast_fp16")]; tensor var_315_begin_0 = const()[name = tensor("op_315_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_315_end_0 = const()[name = tensor("op_315_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_315_end_mask_0 = const()[name = tensor("op_315_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_315_cast_fp16 = slice_by_index(begin = var_315_begin_0, end = var_315_end_0, end_mask = var_315_end_mask_0, x = var_245_cast_fp16)[name = tensor("op_315_cast_fp16")]; tensor var_316_begin_0 = const()[name = tensor("op_316_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_316_end_0 = const()[name = tensor("op_316_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_316_end_mask_0 = const()[name = tensor("op_316_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_316_cast_fp16 = slice_by_index(begin = var_316_begin_0, end = var_316_end_0, end_mask = var_316_end_mask_0, x = var_245_cast_fp16)[name = tensor("op_316_cast_fp16")]; tensor var_317_begin_0 = const()[name = tensor("op_317_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_317_end_0 = const()[name = tensor("op_317_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_317_end_mask_0 = const()[name = tensor("op_317_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_317_cast_fp16 = slice_by_index(begin = var_317_begin_0, end = var_317_end_0, end_mask = var_317_end_mask_0, x = var_245_cast_fp16)[name = tensor("op_317_cast_fp16")]; tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = var_249_cast_fp16)[name = tensor("op_318_cast_fp16")]; tensor var_319_begin_0 = const()[name = tensor("op_319_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_319_end_0 = const()[name = tensor("op_319_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_319_end_mask_0 = const()[name = tensor("op_319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_319_cast_fp16 = slice_by_index(begin = var_319_begin_0, end = var_319_end_0, end_mask = var_319_end_mask_0, x = var_249_cast_fp16)[name = tensor("op_319_cast_fp16")]; tensor var_320_begin_0 = const()[name = tensor("op_320_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_320_end_0 = const()[name = tensor("op_320_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_320_end_mask_0 = const()[name = tensor("op_320_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_320_cast_fp16 = slice_by_index(begin = var_320_begin_0, end = var_320_end_0, end_mask = var_320_end_mask_0, x = var_249_cast_fp16)[name = tensor("op_320_cast_fp16")]; tensor var_321_begin_0 = const()[name = tensor("op_321_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_321_end_0 = const()[name = tensor("op_321_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_321_end_mask_0 = const()[name = tensor("op_321_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_321_cast_fp16 = slice_by_index(begin = var_321_begin_0, end = var_321_end_0, end_mask = var_321_end_mask_0, x = var_249_cast_fp16)[name = tensor("op_321_cast_fp16")]; tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = var_249_cast_fp16)[name = tensor("op_322_cast_fp16")]; tensor var_323_begin_0 = const()[name = tensor("op_323_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_323_end_0 = const()[name = tensor("op_323_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_323_end_mask_0 = const()[name = tensor("op_323_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_323_cast_fp16 = slice_by_index(begin = var_323_begin_0, end = var_323_end_0, end_mask = var_323_end_mask_0, x = var_249_cast_fp16)[name = tensor("op_323_cast_fp16")]; tensor k_1_perm_0 = const()[name = tensor("k_1_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_328_begin_0 = const()[name = tensor("op_328_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_328_end_0 = const()[name = tensor("op_328_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_328_end_mask_0 = const()[name = tensor("op_328_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor("transpose_11")]; tensor var_328_cast_fp16 = slice_by_index(begin = var_328_begin_0, end = var_328_end_0, end_mask = var_328_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_328_cast_fp16")]; tensor var_332_begin_0 = const()[name = tensor("op_332_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_332_end_0 = const()[name = tensor("op_332_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_332_end_mask_0 = const()[name = tensor("op_332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_332_cast_fp16 = slice_by_index(begin = var_332_begin_0, end = var_332_end_0, end_mask = var_332_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_332_cast_fp16")]; tensor var_336_begin_0 = const()[name = tensor("op_336_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_336_end_0 = const()[name = tensor("op_336_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_336_end_mask_0 = const()[name = tensor("op_336_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = var_336_end_0, end_mask = var_336_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_336_cast_fp16")]; tensor var_340_begin_0 = const()[name = tensor("op_340_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_340_end_0 = const()[name = tensor("op_340_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_340_end_mask_0 = const()[name = tensor("op_340_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_340_cast_fp16 = slice_by_index(begin = var_340_begin_0, end = var_340_end_0, end_mask = var_340_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_340_cast_fp16")]; tensor var_344_begin_0 = const()[name = tensor("op_344_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_344_end_0 = const()[name = tensor("op_344_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_344_end_mask_0 = const()[name = tensor("op_344_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_344_cast_fp16")]; tensor var_348_begin_0 = const()[name = tensor("op_348_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_348_end_0 = const()[name = tensor("op_348_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_348_end_mask_0 = const()[name = tensor("op_348_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_348_cast_fp16 = slice_by_index(begin = var_348_begin_0, end = var_348_end_0, end_mask = var_348_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_348_cast_fp16")]; tensor var_352_begin_0 = const()[name = tensor("op_352_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_352_end_0 = const()[name = tensor("op_352_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_352_end_mask_0 = const()[name = tensor("op_352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_352_cast_fp16 = slice_by_index(begin = var_352_begin_0, end = var_352_end_0, end_mask = var_352_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_352_cast_fp16")]; tensor var_356_begin_0 = const()[name = tensor("op_356_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_356_end_0 = const()[name = tensor("op_356_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_356_end_mask_0 = const()[name = tensor("op_356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_356_cast_fp16 = slice_by_index(begin = var_356_begin_0, end = var_356_end_0, end_mask = var_356_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_356_cast_fp16")]; tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_360_cast_fp16")]; tensor var_364_begin_0 = const()[name = tensor("op_364_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_364_end_0 = const()[name = tensor("op_364_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_364_end_mask_0 = const()[name = tensor("op_364_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_364_cast_fp16 = slice_by_index(begin = var_364_begin_0, end = var_364_end_0, end_mask = var_364_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_364_cast_fp16")]; tensor var_368_begin_0 = const()[name = tensor("op_368_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_368_end_0 = const()[name = tensor("op_368_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_368_end_mask_0 = const()[name = tensor("op_368_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_368_cast_fp16 = slice_by_index(begin = var_368_begin_0, end = var_368_end_0, end_mask = var_368_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_368_cast_fp16")]; tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_372_cast_fp16")]; tensor var_374_begin_0 = const()[name = tensor("op_374_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_374_end_0 = const()[name = tensor("op_374_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_374_end_mask_0 = const()[name = tensor("op_374_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_374_cast_fp16")]; tensor var_378_begin_0 = const()[name = tensor("op_378_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_378_end_0 = const()[name = tensor("op_378_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_378_end_mask_0 = const()[name = tensor("op_378_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_378_cast_fp16 = slice_by_index(begin = var_378_begin_0, end = var_378_end_0, end_mask = var_378_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_378_cast_fp16")]; tensor var_382_begin_0 = const()[name = tensor("op_382_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_382_end_0 = const()[name = tensor("op_382_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_382_end_mask_0 = const()[name = tensor("op_382_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_382_cast_fp16 = slice_by_index(begin = var_382_begin_0, end = var_382_end_0, end_mask = var_382_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_382_cast_fp16")]; tensor var_386_begin_0 = const()[name = tensor("op_386_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_386_end_0 = const()[name = tensor("op_386_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_386_end_mask_0 = const()[name = tensor("op_386_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_386_cast_fp16 = slice_by_index(begin = var_386_begin_0, end = var_386_end_0, end_mask = var_386_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_386_cast_fp16")]; tensor var_390_begin_0 = const()[name = tensor("op_390_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_390_end_0 = const()[name = tensor("op_390_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_390_end_mask_0 = const()[name = tensor("op_390_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_390_cast_fp16 = slice_by_index(begin = var_390_begin_0, end = var_390_end_0, end_mask = var_390_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_390_cast_fp16")]; tensor var_394_begin_0 = const()[name = tensor("op_394_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_394_end_0 = const()[name = tensor("op_394_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_394_end_mask_0 = const()[name = tensor("op_394_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_394_cast_fp16 = slice_by_index(begin = var_394_begin_0, end = var_394_end_0, end_mask = var_394_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_394_cast_fp16")]; tensor var_398_begin_0 = const()[name = tensor("op_398_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_398_end_0 = const()[name = tensor("op_398_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_398_end_mask_0 = const()[name = tensor("op_398_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_398_cast_fp16 = slice_by_index(begin = var_398_begin_0, end = var_398_end_0, end_mask = var_398_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_398_cast_fp16")]; tensor var_402_begin_0 = const()[name = tensor("op_402_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_402_end_0 = const()[name = tensor("op_402_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_402_end_mask_0 = const()[name = tensor("op_402_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_402_cast_fp16 = slice_by_index(begin = var_402_begin_0, end = var_402_end_0, end_mask = var_402_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_402_cast_fp16")]; tensor var_406_begin_0 = const()[name = tensor("op_406_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_406_end_0 = const()[name = tensor("op_406_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_406_end_mask_0 = const()[name = tensor("op_406_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_406_cast_fp16 = slice_by_index(begin = var_406_begin_0, end = var_406_end_0, end_mask = var_406_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_406_cast_fp16")]; tensor var_410_begin_0 = const()[name = tensor("op_410_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_410_end_0 = const()[name = tensor("op_410_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_410_end_mask_0 = const()[name = tensor("op_410_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_410_cast_fp16 = slice_by_index(begin = var_410_begin_0, end = var_410_end_0, end_mask = var_410_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_410_cast_fp16")]; tensor var_414_begin_0 = const()[name = tensor("op_414_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_414_end_0 = const()[name = tensor("op_414_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_414_end_mask_0 = const()[name = tensor("op_414_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_414_cast_fp16 = slice_by_index(begin = var_414_begin_0, end = var_414_end_0, end_mask = var_414_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_414_cast_fp16")]; tensor var_418_begin_0 = const()[name = tensor("op_418_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_418_end_0 = const()[name = tensor("op_418_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_418_end_mask_0 = const()[name = tensor("op_418_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_418_cast_fp16 = slice_by_index(begin = var_418_begin_0, end = var_418_end_0, end_mask = var_418_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_418_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1_equation_0, values = (var_328_cast_fp16, var_252_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3_equation_0, values = (var_328_cast_fp16, var_253_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5_equation_0, values = (var_328_cast_fp16, var_254_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5_cast_fp16")]; tensor _SplitHeadsQ__mh_w_7_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_7_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_7_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_7_equation_0, values = (var_328_cast_fp16, var_255_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_7_cast_fp16")]; tensor _SplitHeadsQ__mh_w_9_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_9_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_9_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_9_equation_0, values = (var_328_cast_fp16, var_256_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_9_cast_fp16")]; tensor _SplitHeadsQ__mh_w_11_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_11_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_11_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_11_equation_0, values = (var_328_cast_fp16, var_257_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_11_cast_fp16")]; tensor _SplitHeadsQ__mh_w_13_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_13_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_13_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_13_equation_0, values = (var_332_cast_fp16, var_258_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_13_cast_fp16")]; tensor _SplitHeadsQ__mh_w_15_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_15_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_15_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_15_equation_0, values = (var_332_cast_fp16, var_259_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_15_cast_fp16")]; tensor _SplitHeadsQ__mh_w_17_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_17_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_17_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_17_equation_0, values = (var_332_cast_fp16, var_260_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_17_cast_fp16")]; tensor _SplitHeadsQ__mh_w_19_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_19_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_19_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_19_equation_0, values = (var_332_cast_fp16, var_261_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_19_cast_fp16")]; tensor _SplitHeadsQ__mh_w_21_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_21_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_21_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_21_equation_0, values = (var_332_cast_fp16, var_262_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_21_cast_fp16")]; tensor _SplitHeadsQ__mh_w_23_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_23_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_23_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_23_equation_0, values = (var_332_cast_fp16, var_263_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_23_cast_fp16")]; tensor _SplitHeadsQ__mh_w_25_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_25_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_25_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_25_equation_0, values = (var_336_cast_fp16, var_264_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_25_cast_fp16")]; tensor _SplitHeadsQ__mh_w_27_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_27_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_27_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_27_equation_0, values = (var_336_cast_fp16, var_265_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_27_cast_fp16")]; tensor _SplitHeadsQ__mh_w_29_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_29_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_29_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_29_equation_0, values = (var_336_cast_fp16, var_266_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_29_cast_fp16")]; tensor _SplitHeadsQ__mh_w_31_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_31_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_31_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_31_equation_0, values = (var_336_cast_fp16, var_267_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_31_cast_fp16")]; tensor _SplitHeadsQ__mh_w_33_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_33_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_33_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_33_equation_0, values = (var_336_cast_fp16, var_268_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_33_cast_fp16")]; tensor _SplitHeadsQ__mh_w_35_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_35_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_35_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_35_equation_0, values = (var_336_cast_fp16, var_269_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_35_cast_fp16")]; tensor _SplitHeadsQ__mh_w_37_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_37_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_37_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_37_equation_0, values = (var_340_cast_fp16, var_270_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_37_cast_fp16")]; tensor _SplitHeadsQ__mh_w_39_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_39_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_39_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_39_equation_0, values = (var_340_cast_fp16, var_271_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_39_cast_fp16")]; tensor _SplitHeadsQ__mh_w_41_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_41_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_41_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_41_equation_0, values = (var_340_cast_fp16, var_272_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_41_cast_fp16")]; tensor _SplitHeadsQ__mh_w_43_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_43_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_43_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_43_equation_0, values = (var_340_cast_fp16, var_273_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_43_cast_fp16")]; tensor _SplitHeadsQ__mh_w_45_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_45_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_45_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_45_equation_0, values = (var_340_cast_fp16, var_274_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_45_cast_fp16")]; tensor _SplitHeadsQ__mh_w_47_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_47_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_47_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_47_equation_0, values = (var_340_cast_fp16, var_275_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_47_cast_fp16")]; tensor _SplitHeadsQ__mh_w_49_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_49_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_49_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_49_equation_0, values = (var_344_cast_fp16, var_276_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_49_cast_fp16")]; tensor _SplitHeadsQ__mh_w_51_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_51_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_51_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_51_equation_0, values = (var_344_cast_fp16, var_277_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_51_cast_fp16")]; tensor _SplitHeadsQ__mh_w_53_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_53_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_53_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_53_equation_0, values = (var_344_cast_fp16, var_278_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_53_cast_fp16")]; tensor _SplitHeadsQ__mh_w_55_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_55_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_55_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_55_equation_0, values = (var_344_cast_fp16, var_279_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_55_cast_fp16")]; tensor _SplitHeadsQ__mh_w_57_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_57_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_57_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_57_equation_0, values = (var_344_cast_fp16, var_280_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_57_cast_fp16")]; tensor _SplitHeadsQ__mh_w_59_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_59_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_59_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_59_equation_0, values = (var_344_cast_fp16, var_281_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_59_cast_fp16")]; tensor _SplitHeadsQ__mh_w_61_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_61_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_61_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_61_equation_0, values = (var_348_cast_fp16, var_282_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_61_cast_fp16")]; tensor _SplitHeadsQ__mh_w_63_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_63_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_63_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_63_equation_0, values = (var_348_cast_fp16, var_283_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_63_cast_fp16")]; tensor _SplitHeadsQ__mh_w_65_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_65_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_65_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_65_equation_0, values = (var_348_cast_fp16, var_284_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_65_cast_fp16")]; tensor _SplitHeadsQ__mh_w_67_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_67_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_67_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_67_equation_0, values = (var_348_cast_fp16, var_285_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_67_cast_fp16")]; tensor _SplitHeadsQ__mh_w_69_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_69_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_69_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_69_equation_0, values = (var_348_cast_fp16, var_286_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_69_cast_fp16")]; tensor _SplitHeadsQ__mh_w_71_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_71_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_71_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_71_equation_0, values = (var_348_cast_fp16, var_287_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_71_cast_fp16")]; tensor _SplitHeadsQ__mh_w_73_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_73_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_73_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_73_equation_0, values = (var_352_cast_fp16, var_288_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_73_cast_fp16")]; tensor _SplitHeadsQ__mh_w_75_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_75_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_75_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_75_equation_0, values = (var_352_cast_fp16, var_289_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_75_cast_fp16")]; tensor _SplitHeadsQ__mh_w_77_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_77_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_77_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_77_equation_0, values = (var_352_cast_fp16, var_290_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_77_cast_fp16")]; tensor _SplitHeadsQ__mh_w_79_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_79_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_79_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_79_equation_0, values = (var_352_cast_fp16, var_291_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_79_cast_fp16")]; tensor _SplitHeadsQ__mh_w_81_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_81_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_81_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_81_equation_0, values = (var_352_cast_fp16, var_292_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_81_cast_fp16")]; tensor _SplitHeadsQ__mh_w_83_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_83_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_83_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_83_equation_0, values = (var_352_cast_fp16, var_293_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_83_cast_fp16")]; tensor _SplitHeadsQ__mh_w_85_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_85_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_85_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_85_equation_0, values = (var_356_cast_fp16, var_294_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_85_cast_fp16")]; tensor _SplitHeadsQ__mh_w_87_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_87_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_87_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_87_equation_0, values = (var_356_cast_fp16, var_295_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_87_cast_fp16")]; tensor _SplitHeadsQ__mh_w_89_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_89_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_89_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_89_equation_0, values = (var_356_cast_fp16, var_296_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_89_cast_fp16")]; tensor _SplitHeadsQ__mh_w_91_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_91_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_91_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_91_equation_0, values = (var_356_cast_fp16, var_297_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_91_cast_fp16")]; tensor _SplitHeadsQ__mh_w_93_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_93_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_93_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_93_equation_0, values = (var_356_cast_fp16, var_298_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_93_cast_fp16")]; tensor _SplitHeadsQ__mh_w_95_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_95_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_95_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_95_equation_0, values = (var_356_cast_fp16, var_299_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_95_cast_fp16")]; tensor _SplitHeadsQ__mh_w_97_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_97_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_97_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_97_equation_0, values = (var_360_cast_fp16, var_300_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_97_cast_fp16")]; tensor _SplitHeadsQ__mh_w_99_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_99_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_99_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_99_equation_0, values = (var_360_cast_fp16, var_301_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_99_cast_fp16")]; tensor _SplitHeadsQ__mh_w_101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_101_equation_0, values = (var_360_cast_fp16, var_302_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_103_equation_0, values = (var_360_cast_fp16, var_303_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_105_equation_0, values = (var_360_cast_fp16, var_304_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_107_equation_0, values = (var_360_cast_fp16, var_305_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_109_equation_0, values = (var_364_cast_fp16, var_306_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_111_equation_0, values = (var_364_cast_fp16, var_307_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_113_equation_0, values = (var_364_cast_fp16, var_308_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_115_equation_0, values = (var_364_cast_fp16, var_309_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_117_equation_0, values = (var_364_cast_fp16, var_310_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_119_equation_0, values = (var_364_cast_fp16, var_311_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_121_equation_0, values = (var_368_cast_fp16, var_312_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_123_equation_0, values = (var_368_cast_fp16, var_313_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_125_equation_0, values = (var_368_cast_fp16, var_314_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_127_equation_0, values = (var_368_cast_fp16, var_315_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_129_equation_0, values = (var_368_cast_fp16, var_316_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_131_equation_0, values = (var_368_cast_fp16, var_317_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_133_equation_0, values = (var_372_cast_fp16, var_318_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_135_equation_0, values = (var_372_cast_fp16, var_319_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_137_equation_0, values = (var_372_cast_fp16, var_320_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_139_equation_0, values = (var_372_cast_fp16, var_321_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_141_equation_0, values = (var_372_cast_fp16, var_322_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_143_equation_0, values = (var_372_cast_fp16, var_323_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_143_cast_fp16")]; tensor var_565_to_fp16 = const()[name = tensor("op_565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1_cast_fp16, y = var_565_to_fp16)[name = tensor("aw_chunk_1_cast_fp16")]; tensor var_567_to_fp16 = const()[name = tensor("op_567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3_cast_fp16, y = var_567_to_fp16)[name = tensor("aw_chunk_3_cast_fp16")]; tensor var_569_to_fp16 = const()[name = tensor("op_569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5_cast_fp16, y = var_569_to_fp16)[name = tensor("aw_chunk_5_cast_fp16")]; tensor var_571_to_fp16 = const()[name = tensor("op_571_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_7_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_7_cast_fp16, y = var_571_to_fp16)[name = tensor("aw_chunk_7_cast_fp16")]; tensor var_573_to_fp16 = const()[name = tensor("op_573_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_9_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_9_cast_fp16, y = var_573_to_fp16)[name = tensor("aw_chunk_9_cast_fp16")]; tensor var_575_to_fp16 = const()[name = tensor("op_575_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_11_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_11_cast_fp16, y = var_575_to_fp16)[name = tensor("aw_chunk_11_cast_fp16")]; tensor var_577_to_fp16 = const()[name = tensor("op_577_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_13_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_13_cast_fp16, y = var_577_to_fp16)[name = tensor("aw_chunk_13_cast_fp16")]; tensor var_579_to_fp16 = const()[name = tensor("op_579_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_15_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_15_cast_fp16, y = var_579_to_fp16)[name = tensor("aw_chunk_15_cast_fp16")]; tensor var_581_to_fp16 = const()[name = tensor("op_581_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_17_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_17_cast_fp16, y = var_581_to_fp16)[name = tensor("aw_chunk_17_cast_fp16")]; tensor var_583_to_fp16 = const()[name = tensor("op_583_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_19_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_19_cast_fp16, y = var_583_to_fp16)[name = tensor("aw_chunk_19_cast_fp16")]; tensor var_585_to_fp16 = const()[name = tensor("op_585_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_21_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_21_cast_fp16, y = var_585_to_fp16)[name = tensor("aw_chunk_21_cast_fp16")]; tensor var_587_to_fp16 = const()[name = tensor("op_587_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_23_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_23_cast_fp16, y = var_587_to_fp16)[name = tensor("aw_chunk_23_cast_fp16")]; tensor var_589_to_fp16 = const()[name = tensor("op_589_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_25_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_25_cast_fp16, y = var_589_to_fp16)[name = tensor("aw_chunk_25_cast_fp16")]; tensor var_591_to_fp16 = const()[name = tensor("op_591_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_27_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_27_cast_fp16, y = var_591_to_fp16)[name = tensor("aw_chunk_27_cast_fp16")]; tensor var_593_to_fp16 = const()[name = tensor("op_593_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_29_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_29_cast_fp16, y = var_593_to_fp16)[name = tensor("aw_chunk_29_cast_fp16")]; tensor var_595_to_fp16 = const()[name = tensor("op_595_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_31_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_31_cast_fp16, y = var_595_to_fp16)[name = tensor("aw_chunk_31_cast_fp16")]; tensor var_597_to_fp16 = const()[name = tensor("op_597_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_33_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_33_cast_fp16, y = var_597_to_fp16)[name = tensor("aw_chunk_33_cast_fp16")]; tensor var_599_to_fp16 = const()[name = tensor("op_599_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_35_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_35_cast_fp16, y = var_599_to_fp16)[name = tensor("aw_chunk_35_cast_fp16")]; tensor var_601_to_fp16 = const()[name = tensor("op_601_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_37_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_37_cast_fp16, y = var_601_to_fp16)[name = tensor("aw_chunk_37_cast_fp16")]; tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_39_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_39_cast_fp16, y = var_603_to_fp16)[name = tensor("aw_chunk_39_cast_fp16")]; tensor var_605_to_fp16 = const()[name = tensor("op_605_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_41_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_41_cast_fp16, y = var_605_to_fp16)[name = tensor("aw_chunk_41_cast_fp16")]; tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_43_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_43_cast_fp16, y = var_607_to_fp16)[name = tensor("aw_chunk_43_cast_fp16")]; tensor var_609_to_fp16 = const()[name = tensor("op_609_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_45_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_45_cast_fp16, y = var_609_to_fp16)[name = tensor("aw_chunk_45_cast_fp16")]; tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_47_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_47_cast_fp16, y = var_611_to_fp16)[name = tensor("aw_chunk_47_cast_fp16")]; tensor var_613_to_fp16 = const()[name = tensor("op_613_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_49_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_49_cast_fp16, y = var_613_to_fp16)[name = tensor("aw_chunk_49_cast_fp16")]; tensor var_615_to_fp16 = const()[name = tensor("op_615_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_51_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_51_cast_fp16, y = var_615_to_fp16)[name = tensor("aw_chunk_51_cast_fp16")]; tensor var_617_to_fp16 = const()[name = tensor("op_617_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_53_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_53_cast_fp16, y = var_617_to_fp16)[name = tensor("aw_chunk_53_cast_fp16")]; tensor var_619_to_fp16 = const()[name = tensor("op_619_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_55_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_55_cast_fp16, y = var_619_to_fp16)[name = tensor("aw_chunk_55_cast_fp16")]; tensor var_621_to_fp16 = const()[name = tensor("op_621_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_57_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_57_cast_fp16, y = var_621_to_fp16)[name = tensor("aw_chunk_57_cast_fp16")]; tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_59_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_59_cast_fp16, y = var_623_to_fp16)[name = tensor("aw_chunk_59_cast_fp16")]; tensor var_625_to_fp16 = const()[name = tensor("op_625_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_61_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_61_cast_fp16, y = var_625_to_fp16)[name = tensor("aw_chunk_61_cast_fp16")]; tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_63_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_63_cast_fp16, y = var_627_to_fp16)[name = tensor("aw_chunk_63_cast_fp16")]; tensor var_629_to_fp16 = const()[name = tensor("op_629_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_65_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_65_cast_fp16, y = var_629_to_fp16)[name = tensor("aw_chunk_65_cast_fp16")]; tensor var_631_to_fp16 = const()[name = tensor("op_631_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_67_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_67_cast_fp16, y = var_631_to_fp16)[name = tensor("aw_chunk_67_cast_fp16")]; tensor var_633_to_fp16 = const()[name = tensor("op_633_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_69_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_69_cast_fp16, y = var_633_to_fp16)[name = tensor("aw_chunk_69_cast_fp16")]; tensor var_635_to_fp16 = const()[name = tensor("op_635_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_71_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_71_cast_fp16, y = var_635_to_fp16)[name = tensor("aw_chunk_71_cast_fp16")]; tensor var_637_to_fp16 = const()[name = tensor("op_637_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_73_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_73_cast_fp16, y = var_637_to_fp16)[name = tensor("aw_chunk_73_cast_fp16")]; tensor var_639_to_fp16 = const()[name = tensor("op_639_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_75_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_75_cast_fp16, y = var_639_to_fp16)[name = tensor("aw_chunk_75_cast_fp16")]; tensor var_641_to_fp16 = const()[name = tensor("op_641_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_77_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_77_cast_fp16, y = var_641_to_fp16)[name = tensor("aw_chunk_77_cast_fp16")]; tensor var_643_to_fp16 = const()[name = tensor("op_643_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_79_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_79_cast_fp16, y = var_643_to_fp16)[name = tensor("aw_chunk_79_cast_fp16")]; tensor var_645_to_fp16 = const()[name = tensor("op_645_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_81_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_81_cast_fp16, y = var_645_to_fp16)[name = tensor("aw_chunk_81_cast_fp16")]; tensor var_647_to_fp16 = const()[name = tensor("op_647_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_83_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_83_cast_fp16, y = var_647_to_fp16)[name = tensor("aw_chunk_83_cast_fp16")]; tensor var_649_to_fp16 = const()[name = tensor("op_649_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_85_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_85_cast_fp16, y = var_649_to_fp16)[name = tensor("aw_chunk_85_cast_fp16")]; tensor var_651_to_fp16 = const()[name = tensor("op_651_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_87_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_87_cast_fp16, y = var_651_to_fp16)[name = tensor("aw_chunk_87_cast_fp16")]; tensor var_653_to_fp16 = const()[name = tensor("op_653_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_89_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_89_cast_fp16, y = var_653_to_fp16)[name = tensor("aw_chunk_89_cast_fp16")]; tensor var_655_to_fp16 = const()[name = tensor("op_655_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_91_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_91_cast_fp16, y = var_655_to_fp16)[name = tensor("aw_chunk_91_cast_fp16")]; tensor var_657_to_fp16 = const()[name = tensor("op_657_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_93_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_93_cast_fp16, y = var_657_to_fp16)[name = tensor("aw_chunk_93_cast_fp16")]; tensor var_659_to_fp16 = const()[name = tensor("op_659_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_95_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_95_cast_fp16, y = var_659_to_fp16)[name = tensor("aw_chunk_95_cast_fp16")]; tensor var_661_to_fp16 = const()[name = tensor("op_661_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_97_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_97_cast_fp16, y = var_661_to_fp16)[name = tensor("aw_chunk_97_cast_fp16")]; tensor var_663_to_fp16 = const()[name = tensor("op_663_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_99_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_99_cast_fp16, y = var_663_to_fp16)[name = tensor("aw_chunk_99_cast_fp16")]; tensor var_665_to_fp16 = const()[name = tensor("op_665_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_101_cast_fp16, y = var_665_to_fp16)[name = tensor("aw_chunk_101_cast_fp16")]; tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_103_cast_fp16, y = var_667_to_fp16)[name = tensor("aw_chunk_103_cast_fp16")]; tensor var_669_to_fp16 = const()[name = tensor("op_669_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_105_cast_fp16, y = var_669_to_fp16)[name = tensor("aw_chunk_105_cast_fp16")]; tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_107_cast_fp16, y = var_671_to_fp16)[name = tensor("aw_chunk_107_cast_fp16")]; tensor var_673_to_fp16 = const()[name = tensor("op_673_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_109_cast_fp16, y = var_673_to_fp16)[name = tensor("aw_chunk_109_cast_fp16")]; tensor var_675_to_fp16 = const()[name = tensor("op_675_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_111_cast_fp16, y = var_675_to_fp16)[name = tensor("aw_chunk_111_cast_fp16")]; tensor var_677_to_fp16 = const()[name = tensor("op_677_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_113_cast_fp16, y = var_677_to_fp16)[name = tensor("aw_chunk_113_cast_fp16")]; tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_115_cast_fp16, y = var_679_to_fp16)[name = tensor("aw_chunk_115_cast_fp16")]; tensor var_681_to_fp16 = const()[name = tensor("op_681_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_117_cast_fp16, y = var_681_to_fp16)[name = tensor("aw_chunk_117_cast_fp16")]; tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_119_cast_fp16, y = var_683_to_fp16)[name = tensor("aw_chunk_119_cast_fp16")]; tensor var_685_to_fp16 = const()[name = tensor("op_685_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_121_cast_fp16, y = var_685_to_fp16)[name = tensor("aw_chunk_121_cast_fp16")]; tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_123_cast_fp16, y = var_687_to_fp16)[name = tensor("aw_chunk_123_cast_fp16")]; tensor var_689_to_fp16 = const()[name = tensor("op_689_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_125_cast_fp16, y = var_689_to_fp16)[name = tensor("aw_chunk_125_cast_fp16")]; tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_127_cast_fp16, y = var_691_to_fp16)[name = tensor("aw_chunk_127_cast_fp16")]; tensor var_693_to_fp16 = const()[name = tensor("op_693_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_129_cast_fp16, y = var_693_to_fp16)[name = tensor("aw_chunk_129_cast_fp16")]; tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_131_cast_fp16, y = var_695_to_fp16)[name = tensor("aw_chunk_131_cast_fp16")]; tensor var_697_to_fp16 = const()[name = tensor("op_697_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_133_cast_fp16, y = var_697_to_fp16)[name = tensor("aw_chunk_133_cast_fp16")]; tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_135_cast_fp16, y = var_699_to_fp16)[name = tensor("aw_chunk_135_cast_fp16")]; tensor var_701_to_fp16 = const()[name = tensor("op_701_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_137_cast_fp16, y = var_701_to_fp16)[name = tensor("aw_chunk_137_cast_fp16")]; tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_139_cast_fp16, y = var_703_to_fp16)[name = tensor("aw_chunk_139_cast_fp16")]; tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_141_cast_fp16, y = var_705_to_fp16)[name = tensor("aw_chunk_141_cast_fp16")]; tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_143_cast_fp16, y = var_707_to_fp16)[name = tensor("aw_chunk_143_cast_fp16")]; tensor var_709_cast_fp16 = softmax(axis = var_153, x = aw_chunk_1_cast_fp16)[name = tensor("op_709_cast_fp16")]; tensor var_710_cast_fp16 = softmax(axis = var_153, x = aw_chunk_3_cast_fp16)[name = tensor("op_710_cast_fp16")]; tensor var_711_cast_fp16 = softmax(axis = var_153, x = aw_chunk_5_cast_fp16)[name = tensor("op_711_cast_fp16")]; tensor var_712_cast_fp16 = softmax(axis = var_153, x = aw_chunk_7_cast_fp16)[name = tensor("op_712_cast_fp16")]; tensor var_713_cast_fp16 = softmax(axis = var_153, x = aw_chunk_9_cast_fp16)[name = tensor("op_713_cast_fp16")]; tensor var_714_cast_fp16 = softmax(axis = var_153, x = aw_chunk_11_cast_fp16)[name = tensor("op_714_cast_fp16")]; tensor var_715_cast_fp16 = softmax(axis = var_153, x = aw_chunk_13_cast_fp16)[name = tensor("op_715_cast_fp16")]; tensor var_716_cast_fp16 = softmax(axis = var_153, x = aw_chunk_15_cast_fp16)[name = tensor("op_716_cast_fp16")]; tensor var_717_cast_fp16 = softmax(axis = var_153, x = aw_chunk_17_cast_fp16)[name = tensor("op_717_cast_fp16")]; tensor var_718_cast_fp16 = softmax(axis = var_153, x = aw_chunk_19_cast_fp16)[name = tensor("op_718_cast_fp16")]; tensor var_719_cast_fp16 = softmax(axis = var_153, x = aw_chunk_21_cast_fp16)[name = tensor("op_719_cast_fp16")]; tensor var_720_cast_fp16 = softmax(axis = var_153, x = aw_chunk_23_cast_fp16)[name = tensor("op_720_cast_fp16")]; tensor var_721_cast_fp16 = softmax(axis = var_153, x = aw_chunk_25_cast_fp16)[name = tensor("op_721_cast_fp16")]; tensor var_722_cast_fp16 = softmax(axis = var_153, x = aw_chunk_27_cast_fp16)[name = tensor("op_722_cast_fp16")]; tensor var_723_cast_fp16 = softmax(axis = var_153, x = aw_chunk_29_cast_fp16)[name = tensor("op_723_cast_fp16")]; tensor var_724_cast_fp16 = softmax(axis = var_153, x = aw_chunk_31_cast_fp16)[name = tensor("op_724_cast_fp16")]; tensor var_725_cast_fp16 = softmax(axis = var_153, x = aw_chunk_33_cast_fp16)[name = tensor("op_725_cast_fp16")]; tensor var_726_cast_fp16 = softmax(axis = var_153, x = aw_chunk_35_cast_fp16)[name = tensor("op_726_cast_fp16")]; tensor var_727_cast_fp16 = softmax(axis = var_153, x = aw_chunk_37_cast_fp16)[name = tensor("op_727_cast_fp16")]; tensor var_728_cast_fp16 = softmax(axis = var_153, x = aw_chunk_39_cast_fp16)[name = tensor("op_728_cast_fp16")]; tensor var_729_cast_fp16 = softmax(axis = var_153, x = aw_chunk_41_cast_fp16)[name = tensor("op_729_cast_fp16")]; tensor var_730_cast_fp16 = softmax(axis = var_153, x = aw_chunk_43_cast_fp16)[name = tensor("op_730_cast_fp16")]; tensor var_731_cast_fp16 = softmax(axis = var_153, x = aw_chunk_45_cast_fp16)[name = tensor("op_731_cast_fp16")]; tensor var_732_cast_fp16 = softmax(axis = var_153, x = aw_chunk_47_cast_fp16)[name = tensor("op_732_cast_fp16")]; tensor var_733_cast_fp16 = softmax(axis = var_153, x = aw_chunk_49_cast_fp16)[name = tensor("op_733_cast_fp16")]; tensor var_734_cast_fp16 = softmax(axis = var_153, x = aw_chunk_51_cast_fp16)[name = tensor("op_734_cast_fp16")]; tensor var_735_cast_fp16 = softmax(axis = var_153, x = aw_chunk_53_cast_fp16)[name = tensor("op_735_cast_fp16")]; tensor var_736_cast_fp16 = softmax(axis = var_153, x = aw_chunk_55_cast_fp16)[name = tensor("op_736_cast_fp16")]; tensor var_737_cast_fp16 = softmax(axis = var_153, x = aw_chunk_57_cast_fp16)[name = tensor("op_737_cast_fp16")]; tensor var_738_cast_fp16 = softmax(axis = var_153, x = aw_chunk_59_cast_fp16)[name = tensor("op_738_cast_fp16")]; tensor var_739_cast_fp16 = softmax(axis = var_153, x = aw_chunk_61_cast_fp16)[name = tensor("op_739_cast_fp16")]; tensor var_740_cast_fp16 = softmax(axis = var_153, x = aw_chunk_63_cast_fp16)[name = tensor("op_740_cast_fp16")]; tensor var_741_cast_fp16 = softmax(axis = var_153, x = aw_chunk_65_cast_fp16)[name = tensor("op_741_cast_fp16")]; tensor var_742_cast_fp16 = softmax(axis = var_153, x = aw_chunk_67_cast_fp16)[name = tensor("op_742_cast_fp16")]; tensor var_743_cast_fp16 = softmax(axis = var_153, x = aw_chunk_69_cast_fp16)[name = tensor("op_743_cast_fp16")]; tensor var_744_cast_fp16 = softmax(axis = var_153, x = aw_chunk_71_cast_fp16)[name = tensor("op_744_cast_fp16")]; tensor var_745_cast_fp16 = softmax(axis = var_153, x = aw_chunk_73_cast_fp16)[name = tensor("op_745_cast_fp16")]; tensor var_746_cast_fp16 = softmax(axis = var_153, x = aw_chunk_75_cast_fp16)[name = tensor("op_746_cast_fp16")]; tensor var_747_cast_fp16 = softmax(axis = var_153, x = aw_chunk_77_cast_fp16)[name = tensor("op_747_cast_fp16")]; tensor var_748_cast_fp16 = softmax(axis = var_153, x = aw_chunk_79_cast_fp16)[name = tensor("op_748_cast_fp16")]; tensor var_749_cast_fp16 = softmax(axis = var_153, x = aw_chunk_81_cast_fp16)[name = tensor("op_749_cast_fp16")]; tensor var_750_cast_fp16 = softmax(axis = var_153, x = aw_chunk_83_cast_fp16)[name = tensor("op_750_cast_fp16")]; tensor var_751_cast_fp16 = softmax(axis = var_153, x = aw_chunk_85_cast_fp16)[name = tensor("op_751_cast_fp16")]; tensor var_752_cast_fp16 = softmax(axis = var_153, x = aw_chunk_87_cast_fp16)[name = tensor("op_752_cast_fp16")]; tensor var_753_cast_fp16 = softmax(axis = var_153, x = aw_chunk_89_cast_fp16)[name = tensor("op_753_cast_fp16")]; tensor var_754_cast_fp16 = softmax(axis = var_153, x = aw_chunk_91_cast_fp16)[name = tensor("op_754_cast_fp16")]; tensor var_755_cast_fp16 = softmax(axis = var_153, x = aw_chunk_93_cast_fp16)[name = tensor("op_755_cast_fp16")]; tensor var_756_cast_fp16 = softmax(axis = var_153, x = aw_chunk_95_cast_fp16)[name = tensor("op_756_cast_fp16")]; tensor var_757_cast_fp16 = softmax(axis = var_153, x = aw_chunk_97_cast_fp16)[name = tensor("op_757_cast_fp16")]; tensor var_758_cast_fp16 = softmax(axis = var_153, x = aw_chunk_99_cast_fp16)[name = tensor("op_758_cast_fp16")]; tensor var_759_cast_fp16 = softmax(axis = var_153, x = aw_chunk_101_cast_fp16)[name = tensor("op_759_cast_fp16")]; tensor var_760_cast_fp16 = softmax(axis = var_153, x = aw_chunk_103_cast_fp16)[name = tensor("op_760_cast_fp16")]; tensor var_761_cast_fp16 = softmax(axis = var_153, x = aw_chunk_105_cast_fp16)[name = tensor("op_761_cast_fp16")]; tensor var_762_cast_fp16 = softmax(axis = var_153, x = aw_chunk_107_cast_fp16)[name = tensor("op_762_cast_fp16")]; tensor var_763_cast_fp16 = softmax(axis = var_153, x = aw_chunk_109_cast_fp16)[name = tensor("op_763_cast_fp16")]; tensor var_764_cast_fp16 = softmax(axis = var_153, x = aw_chunk_111_cast_fp16)[name = tensor("op_764_cast_fp16")]; tensor var_765_cast_fp16 = softmax(axis = var_153, x = aw_chunk_113_cast_fp16)[name = tensor("op_765_cast_fp16")]; tensor var_766_cast_fp16 = softmax(axis = var_153, x = aw_chunk_115_cast_fp16)[name = tensor("op_766_cast_fp16")]; tensor var_767_cast_fp16 = softmax(axis = var_153, x = aw_chunk_117_cast_fp16)[name = tensor("op_767_cast_fp16")]; tensor var_768_cast_fp16 = softmax(axis = var_153, x = aw_chunk_119_cast_fp16)[name = tensor("op_768_cast_fp16")]; tensor var_769_cast_fp16 = softmax(axis = var_153, x = aw_chunk_121_cast_fp16)[name = tensor("op_769_cast_fp16")]; tensor var_770_cast_fp16 = softmax(axis = var_153, x = aw_chunk_123_cast_fp16)[name = tensor("op_770_cast_fp16")]; tensor var_771_cast_fp16 = softmax(axis = var_153, x = aw_chunk_125_cast_fp16)[name = tensor("op_771_cast_fp16")]; tensor var_772_cast_fp16 = softmax(axis = var_153, x = aw_chunk_127_cast_fp16)[name = tensor("op_772_cast_fp16")]; tensor var_773_cast_fp16 = softmax(axis = var_153, x = aw_chunk_129_cast_fp16)[name = tensor("op_773_cast_fp16")]; tensor var_774_cast_fp16 = softmax(axis = var_153, x = aw_chunk_131_cast_fp16)[name = tensor("op_774_cast_fp16")]; tensor var_775_cast_fp16 = softmax(axis = var_153, x = aw_chunk_133_cast_fp16)[name = tensor("op_775_cast_fp16")]; tensor var_776_cast_fp16 = softmax(axis = var_153, x = aw_chunk_135_cast_fp16)[name = tensor("op_776_cast_fp16")]; tensor var_777_cast_fp16 = softmax(axis = var_153, x = aw_chunk_137_cast_fp16)[name = tensor("op_777_cast_fp16")]; tensor var_778_cast_fp16 = softmax(axis = var_153, x = aw_chunk_139_cast_fp16)[name = tensor("op_778_cast_fp16")]; tensor var_779_cast_fp16 = softmax(axis = var_153, x = aw_chunk_141_cast_fp16)[name = tensor("op_779_cast_fp16")]; tensor var_780_cast_fp16 = softmax(axis = var_153, x = aw_chunk_143_cast_fp16)[name = tensor("op_780_cast_fp16")]; tensor var_782_equation_0 = const()[name = tensor("op_782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_782_cast_fp16 = einsum(equation = var_782_equation_0, values = (var_374_cast_fp16, var_709_cast_fp16))[name = tensor("op_782_cast_fp16")]; tensor var_784_equation_0 = const()[name = tensor("op_784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_784_cast_fp16 = einsum(equation = var_784_equation_0, values = (var_374_cast_fp16, var_710_cast_fp16))[name = tensor("op_784_cast_fp16")]; tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_374_cast_fp16, var_711_cast_fp16))[name = tensor("op_786_cast_fp16")]; tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_374_cast_fp16, var_712_cast_fp16))[name = tensor("op_788_cast_fp16")]; tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_374_cast_fp16, var_713_cast_fp16))[name = tensor("op_790_cast_fp16")]; tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_374_cast_fp16, var_714_cast_fp16))[name = tensor("op_792_cast_fp16")]; tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_378_cast_fp16, var_715_cast_fp16))[name = tensor("op_794_cast_fp16")]; tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_378_cast_fp16, var_716_cast_fp16))[name = tensor("op_796_cast_fp16")]; tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_378_cast_fp16, var_717_cast_fp16))[name = tensor("op_798_cast_fp16")]; tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_378_cast_fp16, var_718_cast_fp16))[name = tensor("op_800_cast_fp16")]; tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_378_cast_fp16, var_719_cast_fp16))[name = tensor("op_802_cast_fp16")]; tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_378_cast_fp16, var_720_cast_fp16))[name = tensor("op_804_cast_fp16")]; tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_382_cast_fp16, var_721_cast_fp16))[name = tensor("op_806_cast_fp16")]; tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_382_cast_fp16, var_722_cast_fp16))[name = tensor("op_808_cast_fp16")]; tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_382_cast_fp16, var_723_cast_fp16))[name = tensor("op_810_cast_fp16")]; tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_382_cast_fp16, var_724_cast_fp16))[name = tensor("op_812_cast_fp16")]; tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_382_cast_fp16, var_725_cast_fp16))[name = tensor("op_814_cast_fp16")]; tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_382_cast_fp16, var_726_cast_fp16))[name = tensor("op_816_cast_fp16")]; tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_386_cast_fp16, var_727_cast_fp16))[name = tensor("op_818_cast_fp16")]; tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_386_cast_fp16, var_728_cast_fp16))[name = tensor("op_820_cast_fp16")]; tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_386_cast_fp16, var_729_cast_fp16))[name = tensor("op_822_cast_fp16")]; tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_386_cast_fp16, var_730_cast_fp16))[name = tensor("op_824_cast_fp16")]; tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_386_cast_fp16, var_731_cast_fp16))[name = tensor("op_826_cast_fp16")]; tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_386_cast_fp16, var_732_cast_fp16))[name = tensor("op_828_cast_fp16")]; tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_390_cast_fp16, var_733_cast_fp16))[name = tensor("op_830_cast_fp16")]; tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_390_cast_fp16, var_734_cast_fp16))[name = tensor("op_832_cast_fp16")]; tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_390_cast_fp16, var_735_cast_fp16))[name = tensor("op_834_cast_fp16")]; tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_390_cast_fp16, var_736_cast_fp16))[name = tensor("op_836_cast_fp16")]; tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_390_cast_fp16, var_737_cast_fp16))[name = tensor("op_838_cast_fp16")]; tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_390_cast_fp16, var_738_cast_fp16))[name = tensor("op_840_cast_fp16")]; tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_394_cast_fp16, var_739_cast_fp16))[name = tensor("op_842_cast_fp16")]; tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_394_cast_fp16, var_740_cast_fp16))[name = tensor("op_844_cast_fp16")]; tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_394_cast_fp16, var_741_cast_fp16))[name = tensor("op_846_cast_fp16")]; tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_394_cast_fp16, var_742_cast_fp16))[name = tensor("op_848_cast_fp16")]; tensor var_850_equation_0 = const()[name = tensor("op_850_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_850_cast_fp16 = einsum(equation = var_850_equation_0, values = (var_394_cast_fp16, var_743_cast_fp16))[name = tensor("op_850_cast_fp16")]; tensor var_852_equation_0 = const()[name = tensor("op_852_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_852_cast_fp16 = einsum(equation = var_852_equation_0, values = (var_394_cast_fp16, var_744_cast_fp16))[name = tensor("op_852_cast_fp16")]; tensor var_854_equation_0 = const()[name = tensor("op_854_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_854_cast_fp16 = einsum(equation = var_854_equation_0, values = (var_398_cast_fp16, var_745_cast_fp16))[name = tensor("op_854_cast_fp16")]; tensor var_856_equation_0 = const()[name = tensor("op_856_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_856_cast_fp16 = einsum(equation = var_856_equation_0, values = (var_398_cast_fp16, var_746_cast_fp16))[name = tensor("op_856_cast_fp16")]; tensor var_858_equation_0 = const()[name = tensor("op_858_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_858_cast_fp16 = einsum(equation = var_858_equation_0, values = (var_398_cast_fp16, var_747_cast_fp16))[name = tensor("op_858_cast_fp16")]; tensor var_860_equation_0 = const()[name = tensor("op_860_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_860_cast_fp16 = einsum(equation = var_860_equation_0, values = (var_398_cast_fp16, var_748_cast_fp16))[name = tensor("op_860_cast_fp16")]; tensor var_862_equation_0 = const()[name = tensor("op_862_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_862_cast_fp16 = einsum(equation = var_862_equation_0, values = (var_398_cast_fp16, var_749_cast_fp16))[name = tensor("op_862_cast_fp16")]; tensor var_864_equation_0 = const()[name = tensor("op_864_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_864_cast_fp16 = einsum(equation = var_864_equation_0, values = (var_398_cast_fp16, var_750_cast_fp16))[name = tensor("op_864_cast_fp16")]; tensor var_866_equation_0 = const()[name = tensor("op_866_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_866_cast_fp16 = einsum(equation = var_866_equation_0, values = (var_402_cast_fp16, var_751_cast_fp16))[name = tensor("op_866_cast_fp16")]; tensor var_868_equation_0 = const()[name = tensor("op_868_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_868_cast_fp16 = einsum(equation = var_868_equation_0, values = (var_402_cast_fp16, var_752_cast_fp16))[name = tensor("op_868_cast_fp16")]; tensor var_870_equation_0 = const()[name = tensor("op_870_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_870_cast_fp16 = einsum(equation = var_870_equation_0, values = (var_402_cast_fp16, var_753_cast_fp16))[name = tensor("op_870_cast_fp16")]; tensor var_872_equation_0 = const()[name = tensor("op_872_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_872_cast_fp16 = einsum(equation = var_872_equation_0, values = (var_402_cast_fp16, var_754_cast_fp16))[name = tensor("op_872_cast_fp16")]; tensor var_874_equation_0 = const()[name = tensor("op_874_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_874_cast_fp16 = einsum(equation = var_874_equation_0, values = (var_402_cast_fp16, var_755_cast_fp16))[name = tensor("op_874_cast_fp16")]; tensor var_876_equation_0 = const()[name = tensor("op_876_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_876_cast_fp16 = einsum(equation = var_876_equation_0, values = (var_402_cast_fp16, var_756_cast_fp16))[name = tensor("op_876_cast_fp16")]; tensor var_878_equation_0 = const()[name = tensor("op_878_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_878_cast_fp16 = einsum(equation = var_878_equation_0, values = (var_406_cast_fp16, var_757_cast_fp16))[name = tensor("op_878_cast_fp16")]; tensor var_880_equation_0 = const()[name = tensor("op_880_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_880_cast_fp16 = einsum(equation = var_880_equation_0, values = (var_406_cast_fp16, var_758_cast_fp16))[name = tensor("op_880_cast_fp16")]; tensor var_882_equation_0 = const()[name = tensor("op_882_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_882_cast_fp16 = einsum(equation = var_882_equation_0, values = (var_406_cast_fp16, var_759_cast_fp16))[name = tensor("op_882_cast_fp16")]; tensor var_884_equation_0 = const()[name = tensor("op_884_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_884_cast_fp16 = einsum(equation = var_884_equation_0, values = (var_406_cast_fp16, var_760_cast_fp16))[name = tensor("op_884_cast_fp16")]; tensor var_886_equation_0 = const()[name = tensor("op_886_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_886_cast_fp16 = einsum(equation = var_886_equation_0, values = (var_406_cast_fp16, var_761_cast_fp16))[name = tensor("op_886_cast_fp16")]; tensor var_888_equation_0 = const()[name = tensor("op_888_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_888_cast_fp16 = einsum(equation = var_888_equation_0, values = (var_406_cast_fp16, var_762_cast_fp16))[name = tensor("op_888_cast_fp16")]; tensor var_890_equation_0 = const()[name = tensor("op_890_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_890_cast_fp16 = einsum(equation = var_890_equation_0, values = (var_410_cast_fp16, var_763_cast_fp16))[name = tensor("op_890_cast_fp16")]; tensor var_892_equation_0 = const()[name = tensor("op_892_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_892_cast_fp16 = einsum(equation = var_892_equation_0, values = (var_410_cast_fp16, var_764_cast_fp16))[name = tensor("op_892_cast_fp16")]; tensor var_894_equation_0 = const()[name = tensor("op_894_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_894_cast_fp16 = einsum(equation = var_894_equation_0, values = (var_410_cast_fp16, var_765_cast_fp16))[name = tensor("op_894_cast_fp16")]; tensor var_896_equation_0 = const()[name = tensor("op_896_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_896_cast_fp16 = einsum(equation = var_896_equation_0, values = (var_410_cast_fp16, var_766_cast_fp16))[name = tensor("op_896_cast_fp16")]; tensor var_898_equation_0 = const()[name = tensor("op_898_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_898_cast_fp16 = einsum(equation = var_898_equation_0, values = (var_410_cast_fp16, var_767_cast_fp16))[name = tensor("op_898_cast_fp16")]; tensor var_900_equation_0 = const()[name = tensor("op_900_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_900_cast_fp16 = einsum(equation = var_900_equation_0, values = (var_410_cast_fp16, var_768_cast_fp16))[name = tensor("op_900_cast_fp16")]; tensor var_902_equation_0 = const()[name = tensor("op_902_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_902_cast_fp16 = einsum(equation = var_902_equation_0, values = (var_414_cast_fp16, var_769_cast_fp16))[name = tensor("op_902_cast_fp16")]; tensor var_904_equation_0 = const()[name = tensor("op_904_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_904_cast_fp16 = einsum(equation = var_904_equation_0, values = (var_414_cast_fp16, var_770_cast_fp16))[name = tensor("op_904_cast_fp16")]; tensor var_906_equation_0 = const()[name = tensor("op_906_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_906_cast_fp16 = einsum(equation = var_906_equation_0, values = (var_414_cast_fp16, var_771_cast_fp16))[name = tensor("op_906_cast_fp16")]; tensor var_908_equation_0 = const()[name = tensor("op_908_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_908_cast_fp16 = einsum(equation = var_908_equation_0, values = (var_414_cast_fp16, var_772_cast_fp16))[name = tensor("op_908_cast_fp16")]; tensor var_910_equation_0 = const()[name = tensor("op_910_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_910_cast_fp16 = einsum(equation = var_910_equation_0, values = (var_414_cast_fp16, var_773_cast_fp16))[name = tensor("op_910_cast_fp16")]; tensor var_912_equation_0 = const()[name = tensor("op_912_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_912_cast_fp16 = einsum(equation = var_912_equation_0, values = (var_414_cast_fp16, var_774_cast_fp16))[name = tensor("op_912_cast_fp16")]; tensor var_914_equation_0 = const()[name = tensor("op_914_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_914_cast_fp16 = einsum(equation = var_914_equation_0, values = (var_418_cast_fp16, var_775_cast_fp16))[name = tensor("op_914_cast_fp16")]; tensor var_916_equation_0 = const()[name = tensor("op_916_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_916_cast_fp16 = einsum(equation = var_916_equation_0, values = (var_418_cast_fp16, var_776_cast_fp16))[name = tensor("op_916_cast_fp16")]; tensor var_918_equation_0 = const()[name = tensor("op_918_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_918_cast_fp16 = einsum(equation = var_918_equation_0, values = (var_418_cast_fp16, var_777_cast_fp16))[name = tensor("op_918_cast_fp16")]; tensor var_920_equation_0 = const()[name = tensor("op_920_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_920_cast_fp16 = einsum(equation = var_920_equation_0, values = (var_418_cast_fp16, var_778_cast_fp16))[name = tensor("op_920_cast_fp16")]; tensor var_922_equation_0 = const()[name = tensor("op_922_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_922_cast_fp16 = einsum(equation = var_922_equation_0, values = (var_418_cast_fp16, var_779_cast_fp16))[name = tensor("op_922_cast_fp16")]; tensor var_924_equation_0 = const()[name = tensor("op_924_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_924_cast_fp16 = einsum(equation = var_924_equation_0, values = (var_418_cast_fp16, var_780_cast_fp16))[name = tensor("op_924_cast_fp16")]; tensor var_926_interleave_0 = const()[name = tensor("op_926_interleave_0"), val = tensor(false)]; tensor var_926_cast_fp16 = concat(axis = var_137, interleave = var_926_interleave_0, values = (var_782_cast_fp16, var_784_cast_fp16, var_786_cast_fp16, var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16))[name = tensor("op_926_cast_fp16")]; tensor var_928_interleave_0 = const()[name = tensor("op_928_interleave_0"), val = tensor(false)]; tensor var_928_cast_fp16 = concat(axis = var_137, interleave = var_928_interleave_0, values = (var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16, var_804_cast_fp16))[name = tensor("op_928_cast_fp16")]; tensor var_930_interleave_0 = const()[name = tensor("op_930_interleave_0"), val = tensor(false)]; tensor var_930_cast_fp16 = concat(axis = var_137, interleave = var_930_interleave_0, values = (var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16))[name = tensor("op_930_cast_fp16")]; tensor var_932_interleave_0 = const()[name = tensor("op_932_interleave_0"), val = tensor(false)]; tensor var_932_cast_fp16 = concat(axis = var_137, interleave = var_932_interleave_0, values = (var_818_cast_fp16, var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16))[name = tensor("op_932_cast_fp16")]; tensor var_934_interleave_0 = const()[name = tensor("op_934_interleave_0"), val = tensor(false)]; tensor var_934_cast_fp16 = concat(axis = var_137, interleave = var_934_interleave_0, values = (var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16))[name = tensor("op_934_cast_fp16")]; tensor var_936_interleave_0 = const()[name = tensor("op_936_interleave_0"), val = tensor(false)]; tensor var_936_cast_fp16 = concat(axis = var_137, interleave = var_936_interleave_0, values = (var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16, var_850_cast_fp16, var_852_cast_fp16))[name = tensor("op_936_cast_fp16")]; tensor var_938_interleave_0 = const()[name = tensor("op_938_interleave_0"), val = tensor(false)]; tensor var_938_cast_fp16 = concat(axis = var_137, interleave = var_938_interleave_0, values = (var_854_cast_fp16, var_856_cast_fp16, var_858_cast_fp16, var_860_cast_fp16, var_862_cast_fp16, var_864_cast_fp16))[name = tensor("op_938_cast_fp16")]; tensor var_940_interleave_0 = const()[name = tensor("op_940_interleave_0"), val = tensor(false)]; tensor var_940_cast_fp16 = concat(axis = var_137, interleave = var_940_interleave_0, values = (var_866_cast_fp16, var_868_cast_fp16, var_870_cast_fp16, var_872_cast_fp16, var_874_cast_fp16, var_876_cast_fp16))[name = tensor("op_940_cast_fp16")]; tensor var_942_interleave_0 = const()[name = tensor("op_942_interleave_0"), val = tensor(false)]; tensor var_942_cast_fp16 = concat(axis = var_137, interleave = var_942_interleave_0, values = (var_878_cast_fp16, var_880_cast_fp16, var_882_cast_fp16, var_884_cast_fp16, var_886_cast_fp16, var_888_cast_fp16))[name = tensor("op_942_cast_fp16")]; tensor var_944_interleave_0 = const()[name = tensor("op_944_interleave_0"), val = tensor(false)]; tensor var_944_cast_fp16 = concat(axis = var_137, interleave = var_944_interleave_0, values = (var_890_cast_fp16, var_892_cast_fp16, var_894_cast_fp16, var_896_cast_fp16, var_898_cast_fp16, var_900_cast_fp16))[name = tensor("op_944_cast_fp16")]; tensor var_946_interleave_0 = const()[name = tensor("op_946_interleave_0"), val = tensor(false)]; tensor var_946_cast_fp16 = concat(axis = var_137, interleave = var_946_interleave_0, values = (var_902_cast_fp16, var_904_cast_fp16, var_906_cast_fp16, var_908_cast_fp16, var_910_cast_fp16, var_912_cast_fp16))[name = tensor("op_946_cast_fp16")]; tensor var_948_interleave_0 = const()[name = tensor("op_948_interleave_0"), val = tensor(false)]; tensor var_948_cast_fp16 = concat(axis = var_137, interleave = var_948_interleave_0, values = (var_914_cast_fp16, var_916_cast_fp16, var_918_cast_fp16, var_920_cast_fp16, var_922_cast_fp16, var_924_cast_fp16))[name = tensor("op_948_cast_fp16")]; tensor input_1_interleave_0 = const()[name = tensor("input_1_interleave_0"), val = tensor(false)]; tensor input_1_cast_fp16 = concat(axis = var_153, interleave = input_1_interleave_0, values = (var_926_cast_fp16, var_928_cast_fp16, var_930_cast_fp16, var_932_cast_fp16, var_934_cast_fp16, var_936_cast_fp16, var_938_cast_fp16, var_940_cast_fp16, var_942_cast_fp16, var_944_cast_fp16, var_946_cast_fp16, var_948_cast_fp16))[name = tensor("input_1_cast_fp16")]; tensor obj_3_pad_type_0 = const()[name = tensor("obj_3_pad_type_0"), val = tensor("valid")]; tensor obj_3_strides_0 = const()[name = tensor("obj_3_strides_0"), val = tensor([1, 1])]; tensor obj_3_pad_0 = const()[name = tensor("obj_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_3_dilations_0 = const()[name = tensor("obj_3_dilations_0"), val = tensor([1, 1])]; tensor obj_3_groups_0 = const()[name = tensor("obj_3_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9763776)))]; tensor layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10943488)))]; tensor obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_3_dilations_0, groups = obj_3_groups_0, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = obj_3_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("obj_3_cast_fp16")]; tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; tensor var_967_to_fp16 = const()[name = tensor("op_967_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_967_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10945088)))]; tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10946688)))]; tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor input_5_pad_type_0 = const()[name = tensor("input_5_pad_type_0"), val = tensor("valid")]; tensor input_5_strides_0 = const()[name = tensor("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = tensor("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = tensor("input_5_dilations_0"), val = tensor([1, 1])]; tensor input_5_groups_0 = const()[name = tensor("input_5_groups_0"), val = tensor(1)]; tensor layers_0_fc1_weight_to_fp16 = const()[name = tensor("layers_0_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10948288)))]; tensor layers_0_fc1_bias_to_fp16 = const()[name = tensor("layers_0_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15666944)))]; tensor input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor hidden_states_5_pad_type_0 = const()[name = tensor("hidden_states_5_pad_type_0"), val = tensor("valid")]; tensor hidden_states_5_strides_0 = const()[name = tensor("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = tensor("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = tensor("hidden_states_5_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_5_groups_0 = const()[name = tensor("hidden_states_5_groups_0"), val = tensor(1)]; tensor layers_0_fc2_weight_to_fp16 = const()[name = tensor("layers_0_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15673152)))]; tensor layers_0_fc2_bias_to_fp16 = const()[name = tensor("layers_0_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20391808)))]; tensor hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; tensor var_999 = const()[name = tensor("op_999"), val = tensor(3)]; tensor var_1015 = const()[name = tensor("op_1015"), val = tensor(1)]; tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; tensor var_1032_to_fp16 = const()[name = tensor("op_1032_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_1032_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20393408)))]; tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20395008)))]; tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; tensor query_3_pad_type_0 = const()[name = tensor("query_3_pad_type_0"), val = tensor("valid")]; tensor query_3_strides_0 = const()[name = tensor("query_3_strides_0"), val = tensor([1, 1])]; tensor query_3_pad_0 = const()[name = tensor("query_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_3_dilations_0 = const()[name = tensor("query_3_dilations_0"), val = tensor([1, 1])]; tensor query_3_groups_0 = const()[name = tensor("query_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20396608)))]; tensor layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21576320)))]; tensor query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("query_3_cast_fp16")]; tensor key_3_pad_type_0 = const()[name = tensor("key_3_pad_type_0"), val = tensor("valid")]; tensor key_3_strides_0 = const()[name = tensor("key_3_strides_0"), val = tensor([1, 1])]; tensor key_3_pad_0 = const()[name = tensor("key_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_3_dilations_0 = const()[name = tensor("key_3_dilations_0"), val = tensor([1, 1])]; tensor key_3_groups_0 = const()[name = tensor("key_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21577920)))]; tensor key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("key_3_cast_fp16")]; tensor value_3_pad_type_0 = const()[name = tensor("value_3_pad_type_0"), val = tensor("valid")]; tensor value_3_strides_0 = const()[name = tensor("value_3_strides_0"), val = tensor([1, 1])]; tensor value_3_pad_0 = const()[name = tensor("value_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_3_dilations_0 = const()[name = tensor("value_3_dilations_0"), val = tensor([1, 1])]; tensor value_3_groups_0 = const()[name = tensor("value_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22757632)))]; tensor layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23937344)))]; tensor value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("value_3_cast_fp16")]; tensor var_1067_begin_0 = const()[name = tensor("op_1067_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1067_end_0 = const()[name = tensor("op_1067_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1067_end_mask_0 = const()[name = tensor("op_1067_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1067_cast_fp16 = slice_by_index(begin = var_1067_begin_0, end = var_1067_end_0, end_mask = var_1067_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1067_cast_fp16")]; tensor var_1071_begin_0 = const()[name = tensor("op_1071_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1071_end_0 = const()[name = tensor("op_1071_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1071_end_mask_0 = const()[name = tensor("op_1071_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1071_cast_fp16 = slice_by_index(begin = var_1071_begin_0, end = var_1071_end_0, end_mask = var_1071_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1071_cast_fp16")]; tensor var_1075_begin_0 = const()[name = tensor("op_1075_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1075_end_0 = const()[name = tensor("op_1075_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1075_end_mask_0 = const()[name = tensor("op_1075_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1075_cast_fp16 = slice_by_index(begin = var_1075_begin_0, end = var_1075_end_0, end_mask = var_1075_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1075_cast_fp16")]; tensor var_1079_begin_0 = const()[name = tensor("op_1079_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1079_end_0 = const()[name = tensor("op_1079_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1079_end_mask_0 = const()[name = tensor("op_1079_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1079_cast_fp16 = slice_by_index(begin = var_1079_begin_0, end = var_1079_end_0, end_mask = var_1079_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1079_cast_fp16")]; tensor var_1083_begin_0 = const()[name = tensor("op_1083_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1083_end_0 = const()[name = tensor("op_1083_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1083_end_mask_0 = const()[name = tensor("op_1083_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = var_1083_end_0, end_mask = var_1083_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1083_cast_fp16")]; tensor var_1087_begin_0 = const()[name = tensor("op_1087_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1087_end_0 = const()[name = tensor("op_1087_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1087_end_mask_0 = const()[name = tensor("op_1087_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1087_cast_fp16 = slice_by_index(begin = var_1087_begin_0, end = var_1087_end_0, end_mask = var_1087_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1087_cast_fp16")]; tensor var_1091_begin_0 = const()[name = tensor("op_1091_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1091_end_0 = const()[name = tensor("op_1091_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1091_end_mask_0 = const()[name = tensor("op_1091_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1091_cast_fp16 = slice_by_index(begin = var_1091_begin_0, end = var_1091_end_0, end_mask = var_1091_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1091_cast_fp16")]; tensor var_1095_begin_0 = const()[name = tensor("op_1095_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1095_end_0 = const()[name = tensor("op_1095_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_1095_end_mask_0 = const()[name = tensor("op_1095_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1095_cast_fp16 = slice_by_index(begin = var_1095_begin_0, end = var_1095_end_0, end_mask = var_1095_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1095_cast_fp16")]; tensor var_1099_begin_0 = const()[name = tensor("op_1099_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_1099_end_0 = const()[name = tensor("op_1099_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_1099_end_mask_0 = const()[name = tensor("op_1099_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1099_cast_fp16 = slice_by_index(begin = var_1099_begin_0, end = var_1099_end_0, end_mask = var_1099_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1099_cast_fp16")]; tensor var_1103_begin_0 = const()[name = tensor("op_1103_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_1103_end_0 = const()[name = tensor("op_1103_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_1103_end_mask_0 = const()[name = tensor("op_1103_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1103_cast_fp16 = slice_by_index(begin = var_1103_begin_0, end = var_1103_end_0, end_mask = var_1103_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1103_cast_fp16")]; tensor var_1107_begin_0 = const()[name = tensor("op_1107_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_1107_end_0 = const()[name = tensor("op_1107_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_1107_end_mask_0 = const()[name = tensor("op_1107_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1107_cast_fp16 = slice_by_index(begin = var_1107_begin_0, end = var_1107_end_0, end_mask = var_1107_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1107_cast_fp16")]; tensor var_1111_begin_0 = const()[name = tensor("op_1111_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_1111_end_0 = const()[name = tensor("op_1111_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1111_end_mask_0 = const()[name = tensor("op_1111_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1111_cast_fp16 = slice_by_index(begin = var_1111_begin_0, end = var_1111_end_0, end_mask = var_1111_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1111_cast_fp16")]; tensor var_1114_begin_0 = const()[name = tensor("op_1114_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1114_end_0 = const()[name = tensor("op_1114_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1114_end_mask_0 = const()[name = tensor("op_1114_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1114_cast_fp16 = slice_by_index(begin = var_1114_begin_0, end = var_1114_end_0, end_mask = var_1114_end_mask_0, x = var_1067_cast_fp16)[name = tensor("op_1114_cast_fp16")]; tensor var_1115_begin_0 = const()[name = tensor("op_1115_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1115_end_0 = const()[name = tensor("op_1115_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1115_end_mask_0 = const()[name = tensor("op_1115_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1115_cast_fp16 = slice_by_index(begin = var_1115_begin_0, end = var_1115_end_0, end_mask = var_1115_end_mask_0, x = var_1067_cast_fp16)[name = tensor("op_1115_cast_fp16")]; tensor var_1116_begin_0 = const()[name = tensor("op_1116_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1116_end_0 = const()[name = tensor("op_1116_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1116_end_mask_0 = const()[name = tensor("op_1116_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1116_cast_fp16 = slice_by_index(begin = var_1116_begin_0, end = var_1116_end_0, end_mask = var_1116_end_mask_0, x = var_1067_cast_fp16)[name = tensor("op_1116_cast_fp16")]; tensor var_1117_begin_0 = const()[name = tensor("op_1117_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1117_end_0 = const()[name = tensor("op_1117_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1117_end_mask_0 = const()[name = tensor("op_1117_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1117_cast_fp16 = slice_by_index(begin = var_1117_begin_0, end = var_1117_end_0, end_mask = var_1117_end_mask_0, x = var_1067_cast_fp16)[name = tensor("op_1117_cast_fp16")]; tensor var_1118_begin_0 = const()[name = tensor("op_1118_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1118_end_0 = const()[name = tensor("op_1118_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1118_end_mask_0 = const()[name = tensor("op_1118_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1118_cast_fp16 = slice_by_index(begin = var_1118_begin_0, end = var_1118_end_0, end_mask = var_1118_end_mask_0, x = var_1067_cast_fp16)[name = tensor("op_1118_cast_fp16")]; tensor var_1119_begin_0 = const()[name = tensor("op_1119_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1119_end_0 = const()[name = tensor("op_1119_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1119_end_mask_0 = const()[name = tensor("op_1119_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1119_cast_fp16 = slice_by_index(begin = var_1119_begin_0, end = var_1119_end_0, end_mask = var_1119_end_mask_0, x = var_1067_cast_fp16)[name = tensor("op_1119_cast_fp16")]; tensor var_1120_begin_0 = const()[name = tensor("op_1120_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1120_end_0 = const()[name = tensor("op_1120_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1120_end_mask_0 = const()[name = tensor("op_1120_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1120_cast_fp16 = slice_by_index(begin = var_1120_begin_0, end = var_1120_end_0, end_mask = var_1120_end_mask_0, x = var_1071_cast_fp16)[name = tensor("op_1120_cast_fp16")]; tensor var_1121_begin_0 = const()[name = tensor("op_1121_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1121_end_0 = const()[name = tensor("op_1121_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1121_end_mask_0 = const()[name = tensor("op_1121_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1121_cast_fp16 = slice_by_index(begin = var_1121_begin_0, end = var_1121_end_0, end_mask = var_1121_end_mask_0, x = var_1071_cast_fp16)[name = tensor("op_1121_cast_fp16")]; tensor var_1122_begin_0 = const()[name = tensor("op_1122_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1122_end_0 = const()[name = tensor("op_1122_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1122_end_mask_0 = const()[name = tensor("op_1122_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1122_cast_fp16 = slice_by_index(begin = var_1122_begin_0, end = var_1122_end_0, end_mask = var_1122_end_mask_0, x = var_1071_cast_fp16)[name = tensor("op_1122_cast_fp16")]; tensor var_1123_begin_0 = const()[name = tensor("op_1123_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1123_end_0 = const()[name = tensor("op_1123_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1123_end_mask_0 = const()[name = tensor("op_1123_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1123_cast_fp16 = slice_by_index(begin = var_1123_begin_0, end = var_1123_end_0, end_mask = var_1123_end_mask_0, x = var_1071_cast_fp16)[name = tensor("op_1123_cast_fp16")]; tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = var_1071_cast_fp16)[name = tensor("op_1124_cast_fp16")]; tensor var_1125_begin_0 = const()[name = tensor("op_1125_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1125_end_0 = const()[name = tensor("op_1125_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1125_end_mask_0 = const()[name = tensor("op_1125_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1125_cast_fp16 = slice_by_index(begin = var_1125_begin_0, end = var_1125_end_0, end_mask = var_1125_end_mask_0, x = var_1071_cast_fp16)[name = tensor("op_1125_cast_fp16")]; tensor var_1126_begin_0 = const()[name = tensor("op_1126_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1126_end_0 = const()[name = tensor("op_1126_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1126_end_mask_0 = const()[name = tensor("op_1126_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1126_cast_fp16 = slice_by_index(begin = var_1126_begin_0, end = var_1126_end_0, end_mask = var_1126_end_mask_0, x = var_1075_cast_fp16)[name = tensor("op_1126_cast_fp16")]; tensor var_1127_begin_0 = const()[name = tensor("op_1127_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1127_end_0 = const()[name = tensor("op_1127_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1127_end_mask_0 = const()[name = tensor("op_1127_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1127_cast_fp16 = slice_by_index(begin = var_1127_begin_0, end = var_1127_end_0, end_mask = var_1127_end_mask_0, x = var_1075_cast_fp16)[name = tensor("op_1127_cast_fp16")]; tensor var_1128_begin_0 = const()[name = tensor("op_1128_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1128_end_0 = const()[name = tensor("op_1128_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1128_end_mask_0 = const()[name = tensor("op_1128_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1128_cast_fp16 = slice_by_index(begin = var_1128_begin_0, end = var_1128_end_0, end_mask = var_1128_end_mask_0, x = var_1075_cast_fp16)[name = tensor("op_1128_cast_fp16")]; tensor var_1129_begin_0 = const()[name = tensor("op_1129_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1129_end_0 = const()[name = tensor("op_1129_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1129_end_mask_0 = const()[name = tensor("op_1129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1129_cast_fp16 = slice_by_index(begin = var_1129_begin_0, end = var_1129_end_0, end_mask = var_1129_end_mask_0, x = var_1075_cast_fp16)[name = tensor("op_1129_cast_fp16")]; tensor var_1130_begin_0 = const()[name = tensor("op_1130_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1130_end_0 = const()[name = tensor("op_1130_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1130_end_mask_0 = const()[name = tensor("op_1130_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = var_1130_end_0, end_mask = var_1130_end_mask_0, x = var_1075_cast_fp16)[name = tensor("op_1130_cast_fp16")]; tensor var_1131_begin_0 = const()[name = tensor("op_1131_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1131_end_0 = const()[name = tensor("op_1131_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1131_end_mask_0 = const()[name = tensor("op_1131_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1131_cast_fp16 = slice_by_index(begin = var_1131_begin_0, end = var_1131_end_0, end_mask = var_1131_end_mask_0, x = var_1075_cast_fp16)[name = tensor("op_1131_cast_fp16")]; tensor var_1132_begin_0 = const()[name = tensor("op_1132_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1132_end_0 = const()[name = tensor("op_1132_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1132_end_mask_0 = const()[name = tensor("op_1132_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1132_cast_fp16 = slice_by_index(begin = var_1132_begin_0, end = var_1132_end_0, end_mask = var_1132_end_mask_0, x = var_1079_cast_fp16)[name = tensor("op_1132_cast_fp16")]; tensor var_1133_begin_0 = const()[name = tensor("op_1133_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1133_end_0 = const()[name = tensor("op_1133_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1133_end_mask_0 = const()[name = tensor("op_1133_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1133_cast_fp16 = slice_by_index(begin = var_1133_begin_0, end = var_1133_end_0, end_mask = var_1133_end_mask_0, x = var_1079_cast_fp16)[name = tensor("op_1133_cast_fp16")]; tensor var_1134_begin_0 = const()[name = tensor("op_1134_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1134_end_0 = const()[name = tensor("op_1134_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1134_end_mask_0 = const()[name = tensor("op_1134_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1134_cast_fp16 = slice_by_index(begin = var_1134_begin_0, end = var_1134_end_0, end_mask = var_1134_end_mask_0, x = var_1079_cast_fp16)[name = tensor("op_1134_cast_fp16")]; tensor var_1135_begin_0 = const()[name = tensor("op_1135_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1135_end_0 = const()[name = tensor("op_1135_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1135_end_mask_0 = const()[name = tensor("op_1135_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1135_cast_fp16 = slice_by_index(begin = var_1135_begin_0, end = var_1135_end_0, end_mask = var_1135_end_mask_0, x = var_1079_cast_fp16)[name = tensor("op_1135_cast_fp16")]; tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = var_1079_cast_fp16)[name = tensor("op_1136_cast_fp16")]; tensor var_1137_begin_0 = const()[name = tensor("op_1137_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1137_end_0 = const()[name = tensor("op_1137_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1137_end_mask_0 = const()[name = tensor("op_1137_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1137_cast_fp16 = slice_by_index(begin = var_1137_begin_0, end = var_1137_end_0, end_mask = var_1137_end_mask_0, x = var_1079_cast_fp16)[name = tensor("op_1137_cast_fp16")]; tensor var_1138_begin_0 = const()[name = tensor("op_1138_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1138_end_0 = const()[name = tensor("op_1138_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1138_end_mask_0 = const()[name = tensor("op_1138_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1138_cast_fp16 = slice_by_index(begin = var_1138_begin_0, end = var_1138_end_0, end_mask = var_1138_end_mask_0, x = var_1083_cast_fp16)[name = tensor("op_1138_cast_fp16")]; tensor var_1139_begin_0 = const()[name = tensor("op_1139_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1139_end_0 = const()[name = tensor("op_1139_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1139_end_mask_0 = const()[name = tensor("op_1139_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1139_cast_fp16 = slice_by_index(begin = var_1139_begin_0, end = var_1139_end_0, end_mask = var_1139_end_mask_0, x = var_1083_cast_fp16)[name = tensor("op_1139_cast_fp16")]; tensor var_1140_begin_0 = const()[name = tensor("op_1140_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1140_end_0 = const()[name = tensor("op_1140_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1140_end_mask_0 = const()[name = tensor("op_1140_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1140_cast_fp16 = slice_by_index(begin = var_1140_begin_0, end = var_1140_end_0, end_mask = var_1140_end_mask_0, x = var_1083_cast_fp16)[name = tensor("op_1140_cast_fp16")]; tensor var_1141_begin_0 = const()[name = tensor("op_1141_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1141_end_0 = const()[name = tensor("op_1141_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1141_end_mask_0 = const()[name = tensor("op_1141_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1141_cast_fp16 = slice_by_index(begin = var_1141_begin_0, end = var_1141_end_0, end_mask = var_1141_end_mask_0, x = var_1083_cast_fp16)[name = tensor("op_1141_cast_fp16")]; tensor var_1142_begin_0 = const()[name = tensor("op_1142_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1142_end_0 = const()[name = tensor("op_1142_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1142_end_mask_0 = const()[name = tensor("op_1142_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1142_cast_fp16 = slice_by_index(begin = var_1142_begin_0, end = var_1142_end_0, end_mask = var_1142_end_mask_0, x = var_1083_cast_fp16)[name = tensor("op_1142_cast_fp16")]; tensor var_1143_begin_0 = const()[name = tensor("op_1143_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1143_end_0 = const()[name = tensor("op_1143_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1143_end_mask_0 = const()[name = tensor("op_1143_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1143_cast_fp16 = slice_by_index(begin = var_1143_begin_0, end = var_1143_end_0, end_mask = var_1143_end_mask_0, x = var_1083_cast_fp16)[name = tensor("op_1143_cast_fp16")]; tensor var_1144_begin_0 = const()[name = tensor("op_1144_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1144_end_0 = const()[name = tensor("op_1144_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1144_end_mask_0 = const()[name = tensor("op_1144_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1144_cast_fp16 = slice_by_index(begin = var_1144_begin_0, end = var_1144_end_0, end_mask = var_1144_end_mask_0, x = var_1087_cast_fp16)[name = tensor("op_1144_cast_fp16")]; tensor var_1145_begin_0 = const()[name = tensor("op_1145_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1145_end_0 = const()[name = tensor("op_1145_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1145_end_mask_0 = const()[name = tensor("op_1145_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1145_cast_fp16 = slice_by_index(begin = var_1145_begin_0, end = var_1145_end_0, end_mask = var_1145_end_mask_0, x = var_1087_cast_fp16)[name = tensor("op_1145_cast_fp16")]; tensor var_1146_begin_0 = const()[name = tensor("op_1146_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1146_end_0 = const()[name = tensor("op_1146_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1146_end_mask_0 = const()[name = tensor("op_1146_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = var_1087_cast_fp16)[name = tensor("op_1146_cast_fp16")]; tensor var_1147_begin_0 = const()[name = tensor("op_1147_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1147_end_0 = const()[name = tensor("op_1147_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1147_end_mask_0 = const()[name = tensor("op_1147_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1147_cast_fp16 = slice_by_index(begin = var_1147_begin_0, end = var_1147_end_0, end_mask = var_1147_end_mask_0, x = var_1087_cast_fp16)[name = tensor("op_1147_cast_fp16")]; tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = var_1087_cast_fp16)[name = tensor("op_1148_cast_fp16")]; tensor var_1149_begin_0 = const()[name = tensor("op_1149_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1149_end_0 = const()[name = tensor("op_1149_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1149_end_mask_0 = const()[name = tensor("op_1149_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1149_cast_fp16 = slice_by_index(begin = var_1149_begin_0, end = var_1149_end_0, end_mask = var_1149_end_mask_0, x = var_1087_cast_fp16)[name = tensor("op_1149_cast_fp16")]; tensor var_1150_begin_0 = const()[name = tensor("op_1150_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1150_end_0 = const()[name = tensor("op_1150_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1150_end_mask_0 = const()[name = tensor("op_1150_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1150_cast_fp16 = slice_by_index(begin = var_1150_begin_0, end = var_1150_end_0, end_mask = var_1150_end_mask_0, x = var_1091_cast_fp16)[name = tensor("op_1150_cast_fp16")]; tensor var_1151_begin_0 = const()[name = tensor("op_1151_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1151_end_0 = const()[name = tensor("op_1151_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1151_end_mask_0 = const()[name = tensor("op_1151_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1151_cast_fp16 = slice_by_index(begin = var_1151_begin_0, end = var_1151_end_0, end_mask = var_1151_end_mask_0, x = var_1091_cast_fp16)[name = tensor("op_1151_cast_fp16")]; tensor var_1152_begin_0 = const()[name = tensor("op_1152_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1152_end_0 = const()[name = tensor("op_1152_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1152_end_mask_0 = const()[name = tensor("op_1152_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1152_cast_fp16 = slice_by_index(begin = var_1152_begin_0, end = var_1152_end_0, end_mask = var_1152_end_mask_0, x = var_1091_cast_fp16)[name = tensor("op_1152_cast_fp16")]; tensor var_1153_begin_0 = const()[name = tensor("op_1153_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1153_end_0 = const()[name = tensor("op_1153_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1153_end_mask_0 = const()[name = tensor("op_1153_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1153_cast_fp16 = slice_by_index(begin = var_1153_begin_0, end = var_1153_end_0, end_mask = var_1153_end_mask_0, x = var_1091_cast_fp16)[name = tensor("op_1153_cast_fp16")]; tensor var_1154_begin_0 = const()[name = tensor("op_1154_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1154_end_0 = const()[name = tensor("op_1154_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1154_end_mask_0 = const()[name = tensor("op_1154_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1154_cast_fp16 = slice_by_index(begin = var_1154_begin_0, end = var_1154_end_0, end_mask = var_1154_end_mask_0, x = var_1091_cast_fp16)[name = tensor("op_1154_cast_fp16")]; tensor var_1155_begin_0 = const()[name = tensor("op_1155_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1155_end_0 = const()[name = tensor("op_1155_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1155_end_mask_0 = const()[name = tensor("op_1155_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1155_cast_fp16 = slice_by_index(begin = var_1155_begin_0, end = var_1155_end_0, end_mask = var_1155_end_mask_0, x = var_1091_cast_fp16)[name = tensor("op_1155_cast_fp16")]; tensor var_1156_begin_0 = const()[name = tensor("op_1156_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1156_end_0 = const()[name = tensor("op_1156_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1156_end_mask_0 = const()[name = tensor("op_1156_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1156_cast_fp16 = slice_by_index(begin = var_1156_begin_0, end = var_1156_end_0, end_mask = var_1156_end_mask_0, x = var_1095_cast_fp16)[name = tensor("op_1156_cast_fp16")]; tensor var_1157_begin_0 = const()[name = tensor("op_1157_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1157_end_0 = const()[name = tensor("op_1157_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1157_end_mask_0 = const()[name = tensor("op_1157_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1157_cast_fp16 = slice_by_index(begin = var_1157_begin_0, end = var_1157_end_0, end_mask = var_1157_end_mask_0, x = var_1095_cast_fp16)[name = tensor("op_1157_cast_fp16")]; tensor var_1158_begin_0 = const()[name = tensor("op_1158_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1158_end_0 = const()[name = tensor("op_1158_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1158_end_mask_0 = const()[name = tensor("op_1158_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1158_cast_fp16 = slice_by_index(begin = var_1158_begin_0, end = var_1158_end_0, end_mask = var_1158_end_mask_0, x = var_1095_cast_fp16)[name = tensor("op_1158_cast_fp16")]; tensor var_1159_begin_0 = const()[name = tensor("op_1159_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1159_end_0 = const()[name = tensor("op_1159_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1159_end_mask_0 = const()[name = tensor("op_1159_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1159_cast_fp16 = slice_by_index(begin = var_1159_begin_0, end = var_1159_end_0, end_mask = var_1159_end_mask_0, x = var_1095_cast_fp16)[name = tensor("op_1159_cast_fp16")]; tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = var_1095_cast_fp16)[name = tensor("op_1160_cast_fp16")]; tensor var_1161_begin_0 = const()[name = tensor("op_1161_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1161_end_0 = const()[name = tensor("op_1161_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1161_end_mask_0 = const()[name = tensor("op_1161_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1161_cast_fp16 = slice_by_index(begin = var_1161_begin_0, end = var_1161_end_0, end_mask = var_1161_end_mask_0, x = var_1095_cast_fp16)[name = tensor("op_1161_cast_fp16")]; tensor var_1162_begin_0 = const()[name = tensor("op_1162_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1162_end_0 = const()[name = tensor("op_1162_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1162_end_mask_0 = const()[name = tensor("op_1162_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1162_cast_fp16 = slice_by_index(begin = var_1162_begin_0, end = var_1162_end_0, end_mask = var_1162_end_mask_0, x = var_1099_cast_fp16)[name = tensor("op_1162_cast_fp16")]; tensor var_1163_begin_0 = const()[name = tensor("op_1163_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1163_end_0 = const()[name = tensor("op_1163_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1163_end_mask_0 = const()[name = tensor("op_1163_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1163_cast_fp16 = slice_by_index(begin = var_1163_begin_0, end = var_1163_end_0, end_mask = var_1163_end_mask_0, x = var_1099_cast_fp16)[name = tensor("op_1163_cast_fp16")]; tensor var_1164_begin_0 = const()[name = tensor("op_1164_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1164_end_0 = const()[name = tensor("op_1164_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1164_end_mask_0 = const()[name = tensor("op_1164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1164_cast_fp16 = slice_by_index(begin = var_1164_begin_0, end = var_1164_end_0, end_mask = var_1164_end_mask_0, x = var_1099_cast_fp16)[name = tensor("op_1164_cast_fp16")]; tensor var_1165_begin_0 = const()[name = tensor("op_1165_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1165_end_0 = const()[name = tensor("op_1165_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1165_end_mask_0 = const()[name = tensor("op_1165_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1165_cast_fp16 = slice_by_index(begin = var_1165_begin_0, end = var_1165_end_0, end_mask = var_1165_end_mask_0, x = var_1099_cast_fp16)[name = tensor("op_1165_cast_fp16")]; tensor var_1166_begin_0 = const()[name = tensor("op_1166_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1166_end_0 = const()[name = tensor("op_1166_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1166_end_mask_0 = const()[name = tensor("op_1166_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1166_cast_fp16 = slice_by_index(begin = var_1166_begin_0, end = var_1166_end_0, end_mask = var_1166_end_mask_0, x = var_1099_cast_fp16)[name = tensor("op_1166_cast_fp16")]; tensor var_1167_begin_0 = const()[name = tensor("op_1167_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1167_end_0 = const()[name = tensor("op_1167_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1167_end_mask_0 = const()[name = tensor("op_1167_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1167_cast_fp16 = slice_by_index(begin = var_1167_begin_0, end = var_1167_end_0, end_mask = var_1167_end_mask_0, x = var_1099_cast_fp16)[name = tensor("op_1167_cast_fp16")]; tensor var_1168_begin_0 = const()[name = tensor("op_1168_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1168_end_0 = const()[name = tensor("op_1168_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1168_end_mask_0 = const()[name = tensor("op_1168_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1168_cast_fp16 = slice_by_index(begin = var_1168_begin_0, end = var_1168_end_0, end_mask = var_1168_end_mask_0, x = var_1103_cast_fp16)[name = tensor("op_1168_cast_fp16")]; tensor var_1169_begin_0 = const()[name = tensor("op_1169_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1169_end_0 = const()[name = tensor("op_1169_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1169_end_mask_0 = const()[name = tensor("op_1169_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1169_cast_fp16 = slice_by_index(begin = var_1169_begin_0, end = var_1169_end_0, end_mask = var_1169_end_mask_0, x = var_1103_cast_fp16)[name = tensor("op_1169_cast_fp16")]; tensor var_1170_begin_0 = const()[name = tensor("op_1170_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1170_end_0 = const()[name = tensor("op_1170_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1170_end_mask_0 = const()[name = tensor("op_1170_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1170_cast_fp16 = slice_by_index(begin = var_1170_begin_0, end = var_1170_end_0, end_mask = var_1170_end_mask_0, x = var_1103_cast_fp16)[name = tensor("op_1170_cast_fp16")]; tensor var_1171_begin_0 = const()[name = tensor("op_1171_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1171_end_0 = const()[name = tensor("op_1171_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1171_end_mask_0 = const()[name = tensor("op_1171_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1171_cast_fp16 = slice_by_index(begin = var_1171_begin_0, end = var_1171_end_0, end_mask = var_1171_end_mask_0, x = var_1103_cast_fp16)[name = tensor("op_1171_cast_fp16")]; tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = var_1103_cast_fp16)[name = tensor("op_1172_cast_fp16")]; tensor var_1173_begin_0 = const()[name = tensor("op_1173_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1173_end_0 = const()[name = tensor("op_1173_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1173_end_mask_0 = const()[name = tensor("op_1173_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1173_cast_fp16 = slice_by_index(begin = var_1173_begin_0, end = var_1173_end_0, end_mask = var_1173_end_mask_0, x = var_1103_cast_fp16)[name = tensor("op_1173_cast_fp16")]; tensor var_1174_begin_0 = const()[name = tensor("op_1174_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1174_end_0 = const()[name = tensor("op_1174_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1174_end_mask_0 = const()[name = tensor("op_1174_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1174_cast_fp16 = slice_by_index(begin = var_1174_begin_0, end = var_1174_end_0, end_mask = var_1174_end_mask_0, x = var_1107_cast_fp16)[name = tensor("op_1174_cast_fp16")]; tensor var_1175_begin_0 = const()[name = tensor("op_1175_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1175_end_0 = const()[name = tensor("op_1175_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1175_end_mask_0 = const()[name = tensor("op_1175_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1175_cast_fp16 = slice_by_index(begin = var_1175_begin_0, end = var_1175_end_0, end_mask = var_1175_end_mask_0, x = var_1107_cast_fp16)[name = tensor("op_1175_cast_fp16")]; tensor var_1176_begin_0 = const()[name = tensor("op_1176_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1176_end_0 = const()[name = tensor("op_1176_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1176_end_mask_0 = const()[name = tensor("op_1176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1176_cast_fp16 = slice_by_index(begin = var_1176_begin_0, end = var_1176_end_0, end_mask = var_1176_end_mask_0, x = var_1107_cast_fp16)[name = tensor("op_1176_cast_fp16")]; tensor var_1177_begin_0 = const()[name = tensor("op_1177_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1177_end_0 = const()[name = tensor("op_1177_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1177_end_mask_0 = const()[name = tensor("op_1177_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1177_cast_fp16 = slice_by_index(begin = var_1177_begin_0, end = var_1177_end_0, end_mask = var_1177_end_mask_0, x = var_1107_cast_fp16)[name = tensor("op_1177_cast_fp16")]; tensor var_1178_begin_0 = const()[name = tensor("op_1178_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1178_end_0 = const()[name = tensor("op_1178_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1178_end_mask_0 = const()[name = tensor("op_1178_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1178_cast_fp16 = slice_by_index(begin = var_1178_begin_0, end = var_1178_end_0, end_mask = var_1178_end_mask_0, x = var_1107_cast_fp16)[name = tensor("op_1178_cast_fp16")]; tensor var_1179_begin_0 = const()[name = tensor("op_1179_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1179_end_0 = const()[name = tensor("op_1179_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1179_end_mask_0 = const()[name = tensor("op_1179_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1179_cast_fp16 = slice_by_index(begin = var_1179_begin_0, end = var_1179_end_0, end_mask = var_1179_end_mask_0, x = var_1107_cast_fp16)[name = tensor("op_1179_cast_fp16")]; tensor var_1180_begin_0 = const()[name = tensor("op_1180_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1180_end_0 = const()[name = tensor("op_1180_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1180_end_mask_0 = const()[name = tensor("op_1180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1180_cast_fp16 = slice_by_index(begin = var_1180_begin_0, end = var_1180_end_0, end_mask = var_1180_end_mask_0, x = var_1111_cast_fp16)[name = tensor("op_1180_cast_fp16")]; tensor var_1181_begin_0 = const()[name = tensor("op_1181_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1181_end_0 = const()[name = tensor("op_1181_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1181_end_mask_0 = const()[name = tensor("op_1181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1181_cast_fp16 = slice_by_index(begin = var_1181_begin_0, end = var_1181_end_0, end_mask = var_1181_end_mask_0, x = var_1111_cast_fp16)[name = tensor("op_1181_cast_fp16")]; tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = var_1111_cast_fp16)[name = tensor("op_1182_cast_fp16")]; tensor var_1183_begin_0 = const()[name = tensor("op_1183_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1183_end_0 = const()[name = tensor("op_1183_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1183_end_mask_0 = const()[name = tensor("op_1183_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1183_cast_fp16 = slice_by_index(begin = var_1183_begin_0, end = var_1183_end_0, end_mask = var_1183_end_mask_0, x = var_1111_cast_fp16)[name = tensor("op_1183_cast_fp16")]; tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = var_1111_cast_fp16)[name = tensor("op_1184_cast_fp16")]; tensor var_1185_begin_0 = const()[name = tensor("op_1185_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1185_end_0 = const()[name = tensor("op_1185_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1185_end_mask_0 = const()[name = tensor("op_1185_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1185_cast_fp16 = slice_by_index(begin = var_1185_begin_0, end = var_1185_end_0, end_mask = var_1185_end_mask_0, x = var_1111_cast_fp16)[name = tensor("op_1185_cast_fp16")]; tensor k_3_perm_0 = const()[name = tensor("k_3_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor("transpose_10")]; tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1190_cast_fp16")]; tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1194_cast_fp16")]; tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1198_cast_fp16")]; tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1202_cast_fp16")]; tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1206_cast_fp16")]; tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1210_cast_fp16")]; tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1214_cast_fp16")]; tensor var_1218_begin_0 = const()[name = tensor("op_1218_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_1218_end_0 = const()[name = tensor("op_1218_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_1218_end_mask_0 = const()[name = tensor("op_1218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1218_cast_fp16 = slice_by_index(begin = var_1218_begin_0, end = var_1218_end_0, end_mask = var_1218_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1218_cast_fp16")]; tensor var_1222_begin_0 = const()[name = tensor("op_1222_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1222_end_0 = const()[name = tensor("op_1222_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_1222_end_mask_0 = const()[name = tensor("op_1222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1222_cast_fp16 = slice_by_index(begin = var_1222_begin_0, end = var_1222_end_0, end_mask = var_1222_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1222_cast_fp16")]; tensor var_1226_begin_0 = const()[name = tensor("op_1226_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_1226_end_0 = const()[name = tensor("op_1226_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_1226_end_mask_0 = const()[name = tensor("op_1226_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1226_cast_fp16 = slice_by_index(begin = var_1226_begin_0, end = var_1226_end_0, end_mask = var_1226_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1226_cast_fp16")]; tensor var_1230_begin_0 = const()[name = tensor("op_1230_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_1230_end_0 = const()[name = tensor("op_1230_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_1230_end_mask_0 = const()[name = tensor("op_1230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1230_cast_fp16 = slice_by_index(begin = var_1230_begin_0, end = var_1230_end_0, end_mask = var_1230_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1230_cast_fp16")]; tensor var_1234_begin_0 = const()[name = tensor("op_1234_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_1234_end_0 = const()[name = tensor("op_1234_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_1234_end_mask_0 = const()[name = tensor("op_1234_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1234_cast_fp16 = slice_by_index(begin = var_1234_begin_0, end = var_1234_end_0, end_mask = var_1234_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_1234_cast_fp16")]; tensor var_1236_begin_0 = const()[name = tensor("op_1236_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1236_end_0 = const()[name = tensor("op_1236_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1236_end_mask_0 = const()[name = tensor("op_1236_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1236_cast_fp16 = slice_by_index(begin = var_1236_begin_0, end = var_1236_end_0, end_mask = var_1236_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1236_cast_fp16")]; tensor var_1240_begin_0 = const()[name = tensor("op_1240_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1240_end_0 = const()[name = tensor("op_1240_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1240_end_mask_0 = const()[name = tensor("op_1240_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1240_cast_fp16 = slice_by_index(begin = var_1240_begin_0, end = var_1240_end_0, end_mask = var_1240_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1240_cast_fp16")]; tensor var_1244_begin_0 = const()[name = tensor("op_1244_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1244_end_0 = const()[name = tensor("op_1244_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1244_end_mask_0 = const()[name = tensor("op_1244_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1244_cast_fp16 = slice_by_index(begin = var_1244_begin_0, end = var_1244_end_0, end_mask = var_1244_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1244_cast_fp16")]; tensor var_1248_begin_0 = const()[name = tensor("op_1248_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1248_end_0 = const()[name = tensor("op_1248_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1248_end_mask_0 = const()[name = tensor("op_1248_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1248_cast_fp16 = slice_by_index(begin = var_1248_begin_0, end = var_1248_end_0, end_mask = var_1248_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1248_cast_fp16")]; tensor var_1252_begin_0 = const()[name = tensor("op_1252_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1252_end_0 = const()[name = tensor("op_1252_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1252_end_mask_0 = const()[name = tensor("op_1252_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1252_cast_fp16 = slice_by_index(begin = var_1252_begin_0, end = var_1252_end_0, end_mask = var_1252_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1252_cast_fp16")]; tensor var_1256_begin_0 = const()[name = tensor("op_1256_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1256_end_0 = const()[name = tensor("op_1256_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1256_end_mask_0 = const()[name = tensor("op_1256_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1256_cast_fp16 = slice_by_index(begin = var_1256_begin_0, end = var_1256_end_0, end_mask = var_1256_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1256_cast_fp16")]; tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1260_cast_fp16")]; tensor var_1264_begin_0 = const()[name = tensor("op_1264_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1264_end_0 = const()[name = tensor("op_1264_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_1264_end_mask_0 = const()[name = tensor("op_1264_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1264_cast_fp16")]; tensor var_1268_begin_0 = const()[name = tensor("op_1268_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_1268_end_0 = const()[name = tensor("op_1268_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_1268_end_mask_0 = const()[name = tensor("op_1268_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1268_cast_fp16")]; tensor var_1272_begin_0 = const()[name = tensor("op_1272_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_1272_end_0 = const()[name = tensor("op_1272_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_1272_end_mask_0 = const()[name = tensor("op_1272_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1272_cast_fp16 = slice_by_index(begin = var_1272_begin_0, end = var_1272_end_0, end_mask = var_1272_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1272_cast_fp16")]; tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1276_cast_fp16")]; tensor var_1280_begin_0 = const()[name = tensor("op_1280_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_1280_end_0 = const()[name = tensor("op_1280_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1280_end_mask_0 = const()[name = tensor("op_1280_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1280_cast_fp16 = slice_by_index(begin = var_1280_begin_0, end = var_1280_end_0, end_mask = var_1280_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_1280_cast_fp16")]; tensor _SplitHeadsQ__mh_w_145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_145_equation_0, values = (var_1190_cast_fp16, var_1114_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_147_equation_0, values = (var_1190_cast_fp16, var_1115_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_149_equation_0, values = (var_1190_cast_fp16, var_1116_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_151_equation_0, values = (var_1190_cast_fp16, var_1117_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_153_equation_0, values = (var_1190_cast_fp16, var_1118_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_155_equation_0, values = (var_1190_cast_fp16, var_1119_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_157_equation_0, values = (var_1194_cast_fp16, var_1120_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_159_equation_0, values = (var_1194_cast_fp16, var_1121_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_161_equation_0, values = (var_1194_cast_fp16, var_1122_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_163_equation_0, values = (var_1194_cast_fp16, var_1123_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_165_equation_0, values = (var_1194_cast_fp16, var_1124_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_167_equation_0, values = (var_1194_cast_fp16, var_1125_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_169_equation_0, values = (var_1198_cast_fp16, var_1126_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_171_equation_0, values = (var_1198_cast_fp16, var_1127_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_173_equation_0, values = (var_1198_cast_fp16, var_1128_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_175_equation_0, values = (var_1198_cast_fp16, var_1129_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_177_equation_0, values = (var_1198_cast_fp16, var_1130_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_179_equation_0, values = (var_1198_cast_fp16, var_1131_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_181_equation_0, values = (var_1202_cast_fp16, var_1132_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_183_equation_0, values = (var_1202_cast_fp16, var_1133_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_185_equation_0, values = (var_1202_cast_fp16, var_1134_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_187_equation_0, values = (var_1202_cast_fp16, var_1135_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_189_equation_0, values = (var_1202_cast_fp16, var_1136_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_191_equation_0, values = (var_1202_cast_fp16, var_1137_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_193_equation_0, values = (var_1206_cast_fp16, var_1138_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_195_equation_0, values = (var_1206_cast_fp16, var_1139_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_197_equation_0, values = (var_1206_cast_fp16, var_1140_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_199_equation_0, values = (var_1206_cast_fp16, var_1141_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_201_equation_0, values = (var_1206_cast_fp16, var_1142_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_203_equation_0, values = (var_1206_cast_fp16, var_1143_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_205_equation_0, values = (var_1210_cast_fp16, var_1144_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_207_equation_0, values = (var_1210_cast_fp16, var_1145_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_209_equation_0, values = (var_1210_cast_fp16, var_1146_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_211_equation_0, values = (var_1210_cast_fp16, var_1147_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_213_equation_0, values = (var_1210_cast_fp16, var_1148_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_215_equation_0, values = (var_1210_cast_fp16, var_1149_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_217_equation_0, values = (var_1214_cast_fp16, var_1150_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_219_equation_0, values = (var_1214_cast_fp16, var_1151_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_221_equation_0, values = (var_1214_cast_fp16, var_1152_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_223_equation_0, values = (var_1214_cast_fp16, var_1153_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_225_equation_0, values = (var_1214_cast_fp16, var_1154_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_227_equation_0, values = (var_1214_cast_fp16, var_1155_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_229_equation_0, values = (var_1218_cast_fp16, var_1156_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_231_equation_0, values = (var_1218_cast_fp16, var_1157_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_233_equation_0, values = (var_1218_cast_fp16, var_1158_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_235_equation_0, values = (var_1218_cast_fp16, var_1159_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_237_equation_0, values = (var_1218_cast_fp16, var_1160_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_239_equation_0, values = (var_1218_cast_fp16, var_1161_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_241_equation_0, values = (var_1222_cast_fp16, var_1162_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_243_equation_0, values = (var_1222_cast_fp16, var_1163_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_245_equation_0, values = (var_1222_cast_fp16, var_1164_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_247_equation_0, values = (var_1222_cast_fp16, var_1165_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_249_equation_0, values = (var_1222_cast_fp16, var_1166_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_251_equation_0, values = (var_1222_cast_fp16, var_1167_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_253_equation_0, values = (var_1226_cast_fp16, var_1168_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_255_equation_0, values = (var_1226_cast_fp16, var_1169_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_257_equation_0, values = (var_1226_cast_fp16, var_1170_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_259_equation_0, values = (var_1226_cast_fp16, var_1171_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_261_equation_0, values = (var_1226_cast_fp16, var_1172_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_263_equation_0, values = (var_1226_cast_fp16, var_1173_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_265_equation_0, values = (var_1230_cast_fp16, var_1174_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_267_equation_0, values = (var_1230_cast_fp16, var_1175_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_269_equation_0, values = (var_1230_cast_fp16, var_1176_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_271_equation_0, values = (var_1230_cast_fp16, var_1177_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_273_equation_0, values = (var_1230_cast_fp16, var_1178_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_275_equation_0, values = (var_1230_cast_fp16, var_1179_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_277_equation_0, values = (var_1234_cast_fp16, var_1180_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_279_equation_0, values = (var_1234_cast_fp16, var_1181_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_281_equation_0, values = (var_1234_cast_fp16, var_1182_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_283_equation_0, values = (var_1234_cast_fp16, var_1183_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_285_equation_0, values = (var_1234_cast_fp16, var_1184_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_287_equation_0, values = (var_1234_cast_fp16, var_1185_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_287_cast_fp16")]; tensor var_1427_to_fp16 = const()[name = tensor("op_1427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_145_cast_fp16, y = var_1427_to_fp16)[name = tensor("aw_chunk_145_cast_fp16")]; tensor var_1429_to_fp16 = const()[name = tensor("op_1429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_147_cast_fp16, y = var_1429_to_fp16)[name = tensor("aw_chunk_147_cast_fp16")]; tensor var_1431_to_fp16 = const()[name = tensor("op_1431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_149_cast_fp16, y = var_1431_to_fp16)[name = tensor("aw_chunk_149_cast_fp16")]; tensor var_1433_to_fp16 = const()[name = tensor("op_1433_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_151_cast_fp16, y = var_1433_to_fp16)[name = tensor("aw_chunk_151_cast_fp16")]; tensor var_1435_to_fp16 = const()[name = tensor("op_1435_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_153_cast_fp16, y = var_1435_to_fp16)[name = tensor("aw_chunk_153_cast_fp16")]; tensor var_1437_to_fp16 = const()[name = tensor("op_1437_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_155_cast_fp16, y = var_1437_to_fp16)[name = tensor("aw_chunk_155_cast_fp16")]; tensor var_1439_to_fp16 = const()[name = tensor("op_1439_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_157_cast_fp16, y = var_1439_to_fp16)[name = tensor("aw_chunk_157_cast_fp16")]; tensor var_1441_to_fp16 = const()[name = tensor("op_1441_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_159_cast_fp16, y = var_1441_to_fp16)[name = tensor("aw_chunk_159_cast_fp16")]; tensor var_1443_to_fp16 = const()[name = tensor("op_1443_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_161_cast_fp16, y = var_1443_to_fp16)[name = tensor("aw_chunk_161_cast_fp16")]; tensor var_1445_to_fp16 = const()[name = tensor("op_1445_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_163_cast_fp16, y = var_1445_to_fp16)[name = tensor("aw_chunk_163_cast_fp16")]; tensor var_1447_to_fp16 = const()[name = tensor("op_1447_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_165_cast_fp16, y = var_1447_to_fp16)[name = tensor("aw_chunk_165_cast_fp16")]; tensor var_1449_to_fp16 = const()[name = tensor("op_1449_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_167_cast_fp16, y = var_1449_to_fp16)[name = tensor("aw_chunk_167_cast_fp16")]; tensor var_1451_to_fp16 = const()[name = tensor("op_1451_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_169_cast_fp16, y = var_1451_to_fp16)[name = tensor("aw_chunk_169_cast_fp16")]; tensor var_1453_to_fp16 = const()[name = tensor("op_1453_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_171_cast_fp16, y = var_1453_to_fp16)[name = tensor("aw_chunk_171_cast_fp16")]; tensor var_1455_to_fp16 = const()[name = tensor("op_1455_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_173_cast_fp16, y = var_1455_to_fp16)[name = tensor("aw_chunk_173_cast_fp16")]; tensor var_1457_to_fp16 = const()[name = tensor("op_1457_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_175_cast_fp16, y = var_1457_to_fp16)[name = tensor("aw_chunk_175_cast_fp16")]; tensor var_1459_to_fp16 = const()[name = tensor("op_1459_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_177_cast_fp16, y = var_1459_to_fp16)[name = tensor("aw_chunk_177_cast_fp16")]; tensor var_1461_to_fp16 = const()[name = tensor("op_1461_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_179_cast_fp16, y = var_1461_to_fp16)[name = tensor("aw_chunk_179_cast_fp16")]; tensor var_1463_to_fp16 = const()[name = tensor("op_1463_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_181_cast_fp16, y = var_1463_to_fp16)[name = tensor("aw_chunk_181_cast_fp16")]; tensor var_1465_to_fp16 = const()[name = tensor("op_1465_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_183_cast_fp16, y = var_1465_to_fp16)[name = tensor("aw_chunk_183_cast_fp16")]; tensor var_1467_to_fp16 = const()[name = tensor("op_1467_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_185_cast_fp16, y = var_1467_to_fp16)[name = tensor("aw_chunk_185_cast_fp16")]; tensor var_1469_to_fp16 = const()[name = tensor("op_1469_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_187_cast_fp16, y = var_1469_to_fp16)[name = tensor("aw_chunk_187_cast_fp16")]; tensor var_1471_to_fp16 = const()[name = tensor("op_1471_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_189_cast_fp16, y = var_1471_to_fp16)[name = tensor("aw_chunk_189_cast_fp16")]; tensor var_1473_to_fp16 = const()[name = tensor("op_1473_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_191_cast_fp16, y = var_1473_to_fp16)[name = tensor("aw_chunk_191_cast_fp16")]; tensor var_1475_to_fp16 = const()[name = tensor("op_1475_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_193_cast_fp16, y = var_1475_to_fp16)[name = tensor("aw_chunk_193_cast_fp16")]; tensor var_1477_to_fp16 = const()[name = tensor("op_1477_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_195_cast_fp16, y = var_1477_to_fp16)[name = tensor("aw_chunk_195_cast_fp16")]; tensor var_1479_to_fp16 = const()[name = tensor("op_1479_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_197_cast_fp16, y = var_1479_to_fp16)[name = tensor("aw_chunk_197_cast_fp16")]; tensor var_1481_to_fp16 = const()[name = tensor("op_1481_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_199_cast_fp16, y = var_1481_to_fp16)[name = tensor("aw_chunk_199_cast_fp16")]; tensor var_1483_to_fp16 = const()[name = tensor("op_1483_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_201_cast_fp16, y = var_1483_to_fp16)[name = tensor("aw_chunk_201_cast_fp16")]; tensor var_1485_to_fp16 = const()[name = tensor("op_1485_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_203_cast_fp16, y = var_1485_to_fp16)[name = tensor("aw_chunk_203_cast_fp16")]; tensor var_1487_to_fp16 = const()[name = tensor("op_1487_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_205_cast_fp16, y = var_1487_to_fp16)[name = tensor("aw_chunk_205_cast_fp16")]; tensor var_1489_to_fp16 = const()[name = tensor("op_1489_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_207_cast_fp16, y = var_1489_to_fp16)[name = tensor("aw_chunk_207_cast_fp16")]; tensor var_1491_to_fp16 = const()[name = tensor("op_1491_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_209_cast_fp16, y = var_1491_to_fp16)[name = tensor("aw_chunk_209_cast_fp16")]; tensor var_1493_to_fp16 = const()[name = tensor("op_1493_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_211_cast_fp16, y = var_1493_to_fp16)[name = tensor("aw_chunk_211_cast_fp16")]; tensor var_1495_to_fp16 = const()[name = tensor("op_1495_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_213_cast_fp16, y = var_1495_to_fp16)[name = tensor("aw_chunk_213_cast_fp16")]; tensor var_1497_to_fp16 = const()[name = tensor("op_1497_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_215_cast_fp16, y = var_1497_to_fp16)[name = tensor("aw_chunk_215_cast_fp16")]; tensor var_1499_to_fp16 = const()[name = tensor("op_1499_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_217_cast_fp16, y = var_1499_to_fp16)[name = tensor("aw_chunk_217_cast_fp16")]; tensor var_1501_to_fp16 = const()[name = tensor("op_1501_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_219_cast_fp16, y = var_1501_to_fp16)[name = tensor("aw_chunk_219_cast_fp16")]; tensor var_1503_to_fp16 = const()[name = tensor("op_1503_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_221_cast_fp16, y = var_1503_to_fp16)[name = tensor("aw_chunk_221_cast_fp16")]; tensor var_1505_to_fp16 = const()[name = tensor("op_1505_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_223_cast_fp16, y = var_1505_to_fp16)[name = tensor("aw_chunk_223_cast_fp16")]; tensor var_1507_to_fp16 = const()[name = tensor("op_1507_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_225_cast_fp16, y = var_1507_to_fp16)[name = tensor("aw_chunk_225_cast_fp16")]; tensor var_1509_to_fp16 = const()[name = tensor("op_1509_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_227_cast_fp16, y = var_1509_to_fp16)[name = tensor("aw_chunk_227_cast_fp16")]; tensor var_1511_to_fp16 = const()[name = tensor("op_1511_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_229_cast_fp16, y = var_1511_to_fp16)[name = tensor("aw_chunk_229_cast_fp16")]; tensor var_1513_to_fp16 = const()[name = tensor("op_1513_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_231_cast_fp16, y = var_1513_to_fp16)[name = tensor("aw_chunk_231_cast_fp16")]; tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_233_cast_fp16, y = var_1515_to_fp16)[name = tensor("aw_chunk_233_cast_fp16")]; tensor var_1517_to_fp16 = const()[name = tensor("op_1517_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_235_cast_fp16, y = var_1517_to_fp16)[name = tensor("aw_chunk_235_cast_fp16")]; tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_237_cast_fp16, y = var_1519_to_fp16)[name = tensor("aw_chunk_237_cast_fp16")]; tensor var_1521_to_fp16 = const()[name = tensor("op_1521_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_239_cast_fp16, y = var_1521_to_fp16)[name = tensor("aw_chunk_239_cast_fp16")]; tensor var_1523_to_fp16 = const()[name = tensor("op_1523_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_241_cast_fp16, y = var_1523_to_fp16)[name = tensor("aw_chunk_241_cast_fp16")]; tensor var_1525_to_fp16 = const()[name = tensor("op_1525_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_243_cast_fp16, y = var_1525_to_fp16)[name = tensor("aw_chunk_243_cast_fp16")]; tensor var_1527_to_fp16 = const()[name = tensor("op_1527_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_245_cast_fp16, y = var_1527_to_fp16)[name = tensor("aw_chunk_245_cast_fp16")]; tensor var_1529_to_fp16 = const()[name = tensor("op_1529_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_247_cast_fp16, y = var_1529_to_fp16)[name = tensor("aw_chunk_247_cast_fp16")]; tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_249_cast_fp16, y = var_1531_to_fp16)[name = tensor("aw_chunk_249_cast_fp16")]; tensor var_1533_to_fp16 = const()[name = tensor("op_1533_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_251_cast_fp16, y = var_1533_to_fp16)[name = tensor("aw_chunk_251_cast_fp16")]; tensor var_1535_to_fp16 = const()[name = tensor("op_1535_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_253_cast_fp16, y = var_1535_to_fp16)[name = tensor("aw_chunk_253_cast_fp16")]; tensor var_1537_to_fp16 = const()[name = tensor("op_1537_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_255_cast_fp16, y = var_1537_to_fp16)[name = tensor("aw_chunk_255_cast_fp16")]; tensor var_1539_to_fp16 = const()[name = tensor("op_1539_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_257_cast_fp16, y = var_1539_to_fp16)[name = tensor("aw_chunk_257_cast_fp16")]; tensor var_1541_to_fp16 = const()[name = tensor("op_1541_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_259_cast_fp16, y = var_1541_to_fp16)[name = tensor("aw_chunk_259_cast_fp16")]; tensor var_1543_to_fp16 = const()[name = tensor("op_1543_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_261_cast_fp16, y = var_1543_to_fp16)[name = tensor("aw_chunk_261_cast_fp16")]; tensor var_1545_to_fp16 = const()[name = tensor("op_1545_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_263_cast_fp16, y = var_1545_to_fp16)[name = tensor("aw_chunk_263_cast_fp16")]; tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_265_cast_fp16, y = var_1547_to_fp16)[name = tensor("aw_chunk_265_cast_fp16")]; tensor var_1549_to_fp16 = const()[name = tensor("op_1549_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_267_cast_fp16, y = var_1549_to_fp16)[name = tensor("aw_chunk_267_cast_fp16")]; tensor var_1551_to_fp16 = const()[name = tensor("op_1551_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_269_cast_fp16, y = var_1551_to_fp16)[name = tensor("aw_chunk_269_cast_fp16")]; tensor var_1553_to_fp16 = const()[name = tensor("op_1553_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_271_cast_fp16, y = var_1553_to_fp16)[name = tensor("aw_chunk_271_cast_fp16")]; tensor var_1555_to_fp16 = const()[name = tensor("op_1555_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_273_cast_fp16, y = var_1555_to_fp16)[name = tensor("aw_chunk_273_cast_fp16")]; tensor var_1557_to_fp16 = const()[name = tensor("op_1557_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_275_cast_fp16, y = var_1557_to_fp16)[name = tensor("aw_chunk_275_cast_fp16")]; tensor var_1559_to_fp16 = const()[name = tensor("op_1559_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_277_cast_fp16, y = var_1559_to_fp16)[name = tensor("aw_chunk_277_cast_fp16")]; tensor var_1561_to_fp16 = const()[name = tensor("op_1561_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_279_cast_fp16, y = var_1561_to_fp16)[name = tensor("aw_chunk_279_cast_fp16")]; tensor var_1563_to_fp16 = const()[name = tensor("op_1563_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_281_cast_fp16, y = var_1563_to_fp16)[name = tensor("aw_chunk_281_cast_fp16")]; tensor var_1565_to_fp16 = const()[name = tensor("op_1565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_283_cast_fp16, y = var_1565_to_fp16)[name = tensor("aw_chunk_283_cast_fp16")]; tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_285_cast_fp16, y = var_1567_to_fp16)[name = tensor("aw_chunk_285_cast_fp16")]; tensor var_1569_to_fp16 = const()[name = tensor("op_1569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_287_cast_fp16, y = var_1569_to_fp16)[name = tensor("aw_chunk_287_cast_fp16")]; tensor var_1571_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_145_cast_fp16)[name = tensor("op_1571_cast_fp16")]; tensor var_1572_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_147_cast_fp16)[name = tensor("op_1572_cast_fp16")]; tensor var_1573_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_149_cast_fp16)[name = tensor("op_1573_cast_fp16")]; tensor var_1574_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_151_cast_fp16)[name = tensor("op_1574_cast_fp16")]; tensor var_1575_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_153_cast_fp16)[name = tensor("op_1575_cast_fp16")]; tensor var_1576_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_155_cast_fp16)[name = tensor("op_1576_cast_fp16")]; tensor var_1577_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_157_cast_fp16)[name = tensor("op_1577_cast_fp16")]; tensor var_1578_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_159_cast_fp16)[name = tensor("op_1578_cast_fp16")]; tensor var_1579_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_161_cast_fp16)[name = tensor("op_1579_cast_fp16")]; tensor var_1580_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_163_cast_fp16)[name = tensor("op_1580_cast_fp16")]; tensor var_1581_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_165_cast_fp16)[name = tensor("op_1581_cast_fp16")]; tensor var_1582_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_167_cast_fp16)[name = tensor("op_1582_cast_fp16")]; tensor var_1583_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_169_cast_fp16)[name = tensor("op_1583_cast_fp16")]; tensor var_1584_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_171_cast_fp16)[name = tensor("op_1584_cast_fp16")]; tensor var_1585_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_173_cast_fp16)[name = tensor("op_1585_cast_fp16")]; tensor var_1586_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_175_cast_fp16)[name = tensor("op_1586_cast_fp16")]; tensor var_1587_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_177_cast_fp16)[name = tensor("op_1587_cast_fp16")]; tensor var_1588_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_179_cast_fp16)[name = tensor("op_1588_cast_fp16")]; tensor var_1589_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_181_cast_fp16)[name = tensor("op_1589_cast_fp16")]; tensor var_1590_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_183_cast_fp16)[name = tensor("op_1590_cast_fp16")]; tensor var_1591_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_185_cast_fp16)[name = tensor("op_1591_cast_fp16")]; tensor var_1592_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_187_cast_fp16)[name = tensor("op_1592_cast_fp16")]; tensor var_1593_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_189_cast_fp16)[name = tensor("op_1593_cast_fp16")]; tensor var_1594_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_191_cast_fp16)[name = tensor("op_1594_cast_fp16")]; tensor var_1595_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_193_cast_fp16)[name = tensor("op_1595_cast_fp16")]; tensor var_1596_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_195_cast_fp16)[name = tensor("op_1596_cast_fp16")]; tensor var_1597_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_197_cast_fp16)[name = tensor("op_1597_cast_fp16")]; tensor var_1598_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_199_cast_fp16)[name = tensor("op_1598_cast_fp16")]; tensor var_1599_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_201_cast_fp16)[name = tensor("op_1599_cast_fp16")]; tensor var_1600_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_203_cast_fp16)[name = tensor("op_1600_cast_fp16")]; tensor var_1601_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_205_cast_fp16)[name = tensor("op_1601_cast_fp16")]; tensor var_1602_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_207_cast_fp16)[name = tensor("op_1602_cast_fp16")]; tensor var_1603_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_209_cast_fp16)[name = tensor("op_1603_cast_fp16")]; tensor var_1604_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_211_cast_fp16)[name = tensor("op_1604_cast_fp16")]; tensor var_1605_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_213_cast_fp16)[name = tensor("op_1605_cast_fp16")]; tensor var_1606_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_215_cast_fp16)[name = tensor("op_1606_cast_fp16")]; tensor var_1607_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_217_cast_fp16)[name = tensor("op_1607_cast_fp16")]; tensor var_1608_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_219_cast_fp16)[name = tensor("op_1608_cast_fp16")]; tensor var_1609_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_221_cast_fp16)[name = tensor("op_1609_cast_fp16")]; tensor var_1610_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_223_cast_fp16)[name = tensor("op_1610_cast_fp16")]; tensor var_1611_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_225_cast_fp16)[name = tensor("op_1611_cast_fp16")]; tensor var_1612_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_227_cast_fp16)[name = tensor("op_1612_cast_fp16")]; tensor var_1613_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_229_cast_fp16)[name = tensor("op_1613_cast_fp16")]; tensor var_1614_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_231_cast_fp16)[name = tensor("op_1614_cast_fp16")]; tensor var_1615_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_233_cast_fp16)[name = tensor("op_1615_cast_fp16")]; tensor var_1616_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_235_cast_fp16)[name = tensor("op_1616_cast_fp16")]; tensor var_1617_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_237_cast_fp16)[name = tensor("op_1617_cast_fp16")]; tensor var_1618_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_239_cast_fp16)[name = tensor("op_1618_cast_fp16")]; tensor var_1619_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_241_cast_fp16)[name = tensor("op_1619_cast_fp16")]; tensor var_1620_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_243_cast_fp16)[name = tensor("op_1620_cast_fp16")]; tensor var_1621_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_245_cast_fp16)[name = tensor("op_1621_cast_fp16")]; tensor var_1622_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_247_cast_fp16)[name = tensor("op_1622_cast_fp16")]; tensor var_1623_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_249_cast_fp16)[name = tensor("op_1623_cast_fp16")]; tensor var_1624_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_251_cast_fp16)[name = tensor("op_1624_cast_fp16")]; tensor var_1625_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_253_cast_fp16)[name = tensor("op_1625_cast_fp16")]; tensor var_1626_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_255_cast_fp16)[name = tensor("op_1626_cast_fp16")]; tensor var_1627_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_257_cast_fp16)[name = tensor("op_1627_cast_fp16")]; tensor var_1628_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_259_cast_fp16)[name = tensor("op_1628_cast_fp16")]; tensor var_1629_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_261_cast_fp16)[name = tensor("op_1629_cast_fp16")]; tensor var_1630_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_263_cast_fp16)[name = tensor("op_1630_cast_fp16")]; tensor var_1631_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_265_cast_fp16)[name = tensor("op_1631_cast_fp16")]; tensor var_1632_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_267_cast_fp16)[name = tensor("op_1632_cast_fp16")]; tensor var_1633_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_269_cast_fp16)[name = tensor("op_1633_cast_fp16")]; tensor var_1634_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_271_cast_fp16)[name = tensor("op_1634_cast_fp16")]; tensor var_1635_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_273_cast_fp16)[name = tensor("op_1635_cast_fp16")]; tensor var_1636_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_275_cast_fp16)[name = tensor("op_1636_cast_fp16")]; tensor var_1637_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_277_cast_fp16)[name = tensor("op_1637_cast_fp16")]; tensor var_1638_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_279_cast_fp16)[name = tensor("op_1638_cast_fp16")]; tensor var_1639_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_281_cast_fp16)[name = tensor("op_1639_cast_fp16")]; tensor var_1640_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_283_cast_fp16)[name = tensor("op_1640_cast_fp16")]; tensor var_1641_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_285_cast_fp16)[name = tensor("op_1641_cast_fp16")]; tensor var_1642_cast_fp16 = softmax(axis = var_1015, x = aw_chunk_287_cast_fp16)[name = tensor("op_1642_cast_fp16")]; tensor var_1644_equation_0 = const()[name = tensor("op_1644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1644_cast_fp16 = einsum(equation = var_1644_equation_0, values = (var_1236_cast_fp16, var_1571_cast_fp16))[name = tensor("op_1644_cast_fp16")]; tensor var_1646_equation_0 = const()[name = tensor("op_1646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1236_cast_fp16, var_1572_cast_fp16))[name = tensor("op_1646_cast_fp16")]; tensor var_1648_equation_0 = const()[name = tensor("op_1648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1236_cast_fp16, var_1573_cast_fp16))[name = tensor("op_1648_cast_fp16")]; tensor var_1650_equation_0 = const()[name = tensor("op_1650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1236_cast_fp16, var_1574_cast_fp16))[name = tensor("op_1650_cast_fp16")]; tensor var_1652_equation_0 = const()[name = tensor("op_1652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1236_cast_fp16, var_1575_cast_fp16))[name = tensor("op_1652_cast_fp16")]; tensor var_1654_equation_0 = const()[name = tensor("op_1654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1236_cast_fp16, var_1576_cast_fp16))[name = tensor("op_1654_cast_fp16")]; tensor var_1656_equation_0 = const()[name = tensor("op_1656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1240_cast_fp16, var_1577_cast_fp16))[name = tensor("op_1656_cast_fp16")]; tensor var_1658_equation_0 = const()[name = tensor("op_1658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1240_cast_fp16, var_1578_cast_fp16))[name = tensor("op_1658_cast_fp16")]; tensor var_1660_equation_0 = const()[name = tensor("op_1660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1240_cast_fp16, var_1579_cast_fp16))[name = tensor("op_1660_cast_fp16")]; tensor var_1662_equation_0 = const()[name = tensor("op_1662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1240_cast_fp16, var_1580_cast_fp16))[name = tensor("op_1662_cast_fp16")]; tensor var_1664_equation_0 = const()[name = tensor("op_1664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1240_cast_fp16, var_1581_cast_fp16))[name = tensor("op_1664_cast_fp16")]; tensor var_1666_equation_0 = const()[name = tensor("op_1666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1240_cast_fp16, var_1582_cast_fp16))[name = tensor("op_1666_cast_fp16")]; tensor var_1668_equation_0 = const()[name = tensor("op_1668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1244_cast_fp16, var_1583_cast_fp16))[name = tensor("op_1668_cast_fp16")]; tensor var_1670_equation_0 = const()[name = tensor("op_1670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1244_cast_fp16, var_1584_cast_fp16))[name = tensor("op_1670_cast_fp16")]; tensor var_1672_equation_0 = const()[name = tensor("op_1672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1672_cast_fp16 = einsum(equation = var_1672_equation_0, values = (var_1244_cast_fp16, var_1585_cast_fp16))[name = tensor("op_1672_cast_fp16")]; tensor var_1674_equation_0 = const()[name = tensor("op_1674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1244_cast_fp16, var_1586_cast_fp16))[name = tensor("op_1674_cast_fp16")]; tensor var_1676_equation_0 = const()[name = tensor("op_1676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1676_cast_fp16 = einsum(equation = var_1676_equation_0, values = (var_1244_cast_fp16, var_1587_cast_fp16))[name = tensor("op_1676_cast_fp16")]; tensor var_1678_equation_0 = const()[name = tensor("op_1678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1244_cast_fp16, var_1588_cast_fp16))[name = tensor("op_1678_cast_fp16")]; tensor var_1680_equation_0 = const()[name = tensor("op_1680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1680_cast_fp16 = einsum(equation = var_1680_equation_0, values = (var_1248_cast_fp16, var_1589_cast_fp16))[name = tensor("op_1680_cast_fp16")]; tensor var_1682_equation_0 = const()[name = tensor("op_1682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1248_cast_fp16, var_1590_cast_fp16))[name = tensor("op_1682_cast_fp16")]; tensor var_1684_equation_0 = const()[name = tensor("op_1684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1684_cast_fp16 = einsum(equation = var_1684_equation_0, values = (var_1248_cast_fp16, var_1591_cast_fp16))[name = tensor("op_1684_cast_fp16")]; tensor var_1686_equation_0 = const()[name = tensor("op_1686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1686_cast_fp16 = einsum(equation = var_1686_equation_0, values = (var_1248_cast_fp16, var_1592_cast_fp16))[name = tensor("op_1686_cast_fp16")]; tensor var_1688_equation_0 = const()[name = tensor("op_1688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1688_cast_fp16 = einsum(equation = var_1688_equation_0, values = (var_1248_cast_fp16, var_1593_cast_fp16))[name = tensor("op_1688_cast_fp16")]; tensor var_1690_equation_0 = const()[name = tensor("op_1690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1690_cast_fp16 = einsum(equation = var_1690_equation_0, values = (var_1248_cast_fp16, var_1594_cast_fp16))[name = tensor("op_1690_cast_fp16")]; tensor var_1692_equation_0 = const()[name = tensor("op_1692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1692_cast_fp16 = einsum(equation = var_1692_equation_0, values = (var_1252_cast_fp16, var_1595_cast_fp16))[name = tensor("op_1692_cast_fp16")]; tensor var_1694_equation_0 = const()[name = tensor("op_1694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1694_cast_fp16 = einsum(equation = var_1694_equation_0, values = (var_1252_cast_fp16, var_1596_cast_fp16))[name = tensor("op_1694_cast_fp16")]; tensor var_1696_equation_0 = const()[name = tensor("op_1696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1696_cast_fp16 = einsum(equation = var_1696_equation_0, values = (var_1252_cast_fp16, var_1597_cast_fp16))[name = tensor("op_1696_cast_fp16")]; tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1252_cast_fp16, var_1598_cast_fp16))[name = tensor("op_1698_cast_fp16")]; tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1252_cast_fp16, var_1599_cast_fp16))[name = tensor("op_1700_cast_fp16")]; tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1252_cast_fp16, var_1600_cast_fp16))[name = tensor("op_1702_cast_fp16")]; tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1256_cast_fp16, var_1601_cast_fp16))[name = tensor("op_1704_cast_fp16")]; tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1256_cast_fp16, var_1602_cast_fp16))[name = tensor("op_1706_cast_fp16")]; tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1256_cast_fp16, var_1603_cast_fp16))[name = tensor("op_1708_cast_fp16")]; tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1256_cast_fp16, var_1604_cast_fp16))[name = tensor("op_1710_cast_fp16")]; tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1256_cast_fp16, var_1605_cast_fp16))[name = tensor("op_1712_cast_fp16")]; tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1256_cast_fp16, var_1606_cast_fp16))[name = tensor("op_1714_cast_fp16")]; tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1260_cast_fp16, var_1607_cast_fp16))[name = tensor("op_1716_cast_fp16")]; tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1260_cast_fp16, var_1608_cast_fp16))[name = tensor("op_1718_cast_fp16")]; tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1260_cast_fp16, var_1609_cast_fp16))[name = tensor("op_1720_cast_fp16")]; tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1260_cast_fp16, var_1610_cast_fp16))[name = tensor("op_1722_cast_fp16")]; tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1260_cast_fp16, var_1611_cast_fp16))[name = tensor("op_1724_cast_fp16")]; tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1260_cast_fp16, var_1612_cast_fp16))[name = tensor("op_1726_cast_fp16")]; tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1264_cast_fp16, var_1613_cast_fp16))[name = tensor("op_1728_cast_fp16")]; tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1264_cast_fp16, var_1614_cast_fp16))[name = tensor("op_1730_cast_fp16")]; tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1264_cast_fp16, var_1615_cast_fp16))[name = tensor("op_1732_cast_fp16")]; tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1264_cast_fp16, var_1616_cast_fp16))[name = tensor("op_1734_cast_fp16")]; tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1264_cast_fp16, var_1617_cast_fp16))[name = tensor("op_1736_cast_fp16")]; tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1264_cast_fp16, var_1618_cast_fp16))[name = tensor("op_1738_cast_fp16")]; tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1268_cast_fp16, var_1619_cast_fp16))[name = tensor("op_1740_cast_fp16")]; tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1268_cast_fp16, var_1620_cast_fp16))[name = tensor("op_1742_cast_fp16")]; tensor var_1744_equation_0 = const()[name = tensor("op_1744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1744_cast_fp16 = einsum(equation = var_1744_equation_0, values = (var_1268_cast_fp16, var_1621_cast_fp16))[name = tensor("op_1744_cast_fp16")]; tensor var_1746_equation_0 = const()[name = tensor("op_1746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1746_cast_fp16 = einsum(equation = var_1746_equation_0, values = (var_1268_cast_fp16, var_1622_cast_fp16))[name = tensor("op_1746_cast_fp16")]; tensor var_1748_equation_0 = const()[name = tensor("op_1748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1748_cast_fp16 = einsum(equation = var_1748_equation_0, values = (var_1268_cast_fp16, var_1623_cast_fp16))[name = tensor("op_1748_cast_fp16")]; tensor var_1750_equation_0 = const()[name = tensor("op_1750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1750_cast_fp16 = einsum(equation = var_1750_equation_0, values = (var_1268_cast_fp16, var_1624_cast_fp16))[name = tensor("op_1750_cast_fp16")]; tensor var_1752_equation_0 = const()[name = tensor("op_1752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1752_cast_fp16 = einsum(equation = var_1752_equation_0, values = (var_1272_cast_fp16, var_1625_cast_fp16))[name = tensor("op_1752_cast_fp16")]; tensor var_1754_equation_0 = const()[name = tensor("op_1754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1754_cast_fp16 = einsum(equation = var_1754_equation_0, values = (var_1272_cast_fp16, var_1626_cast_fp16))[name = tensor("op_1754_cast_fp16")]; tensor var_1756_equation_0 = const()[name = tensor("op_1756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1756_cast_fp16 = einsum(equation = var_1756_equation_0, values = (var_1272_cast_fp16, var_1627_cast_fp16))[name = tensor("op_1756_cast_fp16")]; tensor var_1758_equation_0 = const()[name = tensor("op_1758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1758_cast_fp16 = einsum(equation = var_1758_equation_0, values = (var_1272_cast_fp16, var_1628_cast_fp16))[name = tensor("op_1758_cast_fp16")]; tensor var_1760_equation_0 = const()[name = tensor("op_1760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1760_cast_fp16 = einsum(equation = var_1760_equation_0, values = (var_1272_cast_fp16, var_1629_cast_fp16))[name = tensor("op_1760_cast_fp16")]; tensor var_1762_equation_0 = const()[name = tensor("op_1762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1762_cast_fp16 = einsum(equation = var_1762_equation_0, values = (var_1272_cast_fp16, var_1630_cast_fp16))[name = tensor("op_1762_cast_fp16")]; tensor var_1764_equation_0 = const()[name = tensor("op_1764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1764_cast_fp16 = einsum(equation = var_1764_equation_0, values = (var_1276_cast_fp16, var_1631_cast_fp16))[name = tensor("op_1764_cast_fp16")]; tensor var_1766_equation_0 = const()[name = tensor("op_1766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1766_cast_fp16 = einsum(equation = var_1766_equation_0, values = (var_1276_cast_fp16, var_1632_cast_fp16))[name = tensor("op_1766_cast_fp16")]; tensor var_1768_equation_0 = const()[name = tensor("op_1768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1768_cast_fp16 = einsum(equation = var_1768_equation_0, values = (var_1276_cast_fp16, var_1633_cast_fp16))[name = tensor("op_1768_cast_fp16")]; tensor var_1770_equation_0 = const()[name = tensor("op_1770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1770_cast_fp16 = einsum(equation = var_1770_equation_0, values = (var_1276_cast_fp16, var_1634_cast_fp16))[name = tensor("op_1770_cast_fp16")]; tensor var_1772_equation_0 = const()[name = tensor("op_1772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1772_cast_fp16 = einsum(equation = var_1772_equation_0, values = (var_1276_cast_fp16, var_1635_cast_fp16))[name = tensor("op_1772_cast_fp16")]; tensor var_1774_equation_0 = const()[name = tensor("op_1774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1774_cast_fp16 = einsum(equation = var_1774_equation_0, values = (var_1276_cast_fp16, var_1636_cast_fp16))[name = tensor("op_1774_cast_fp16")]; tensor var_1776_equation_0 = const()[name = tensor("op_1776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1776_cast_fp16 = einsum(equation = var_1776_equation_0, values = (var_1280_cast_fp16, var_1637_cast_fp16))[name = tensor("op_1776_cast_fp16")]; tensor var_1778_equation_0 = const()[name = tensor("op_1778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1778_cast_fp16 = einsum(equation = var_1778_equation_0, values = (var_1280_cast_fp16, var_1638_cast_fp16))[name = tensor("op_1778_cast_fp16")]; tensor var_1780_equation_0 = const()[name = tensor("op_1780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1780_cast_fp16 = einsum(equation = var_1780_equation_0, values = (var_1280_cast_fp16, var_1639_cast_fp16))[name = tensor("op_1780_cast_fp16")]; tensor var_1782_equation_0 = const()[name = tensor("op_1782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1782_cast_fp16 = einsum(equation = var_1782_equation_0, values = (var_1280_cast_fp16, var_1640_cast_fp16))[name = tensor("op_1782_cast_fp16")]; tensor var_1784_equation_0 = const()[name = tensor("op_1784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1784_cast_fp16 = einsum(equation = var_1784_equation_0, values = (var_1280_cast_fp16, var_1641_cast_fp16))[name = tensor("op_1784_cast_fp16")]; tensor var_1786_equation_0 = const()[name = tensor("op_1786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1786_cast_fp16 = einsum(equation = var_1786_equation_0, values = (var_1280_cast_fp16, var_1642_cast_fp16))[name = tensor("op_1786_cast_fp16")]; tensor var_1788_interleave_0 = const()[name = tensor("op_1788_interleave_0"), val = tensor(false)]; tensor var_1788_cast_fp16 = concat(axis = var_999, interleave = var_1788_interleave_0, values = (var_1644_cast_fp16, var_1646_cast_fp16, var_1648_cast_fp16, var_1650_cast_fp16, var_1652_cast_fp16, var_1654_cast_fp16))[name = tensor("op_1788_cast_fp16")]; tensor var_1790_interleave_0 = const()[name = tensor("op_1790_interleave_0"), val = tensor(false)]; tensor var_1790_cast_fp16 = concat(axis = var_999, interleave = var_1790_interleave_0, values = (var_1656_cast_fp16, var_1658_cast_fp16, var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1790_cast_fp16")]; tensor var_1792_interleave_0 = const()[name = tensor("op_1792_interleave_0"), val = tensor(false)]; tensor var_1792_cast_fp16 = concat(axis = var_999, interleave = var_1792_interleave_0, values = (var_1668_cast_fp16, var_1670_cast_fp16, var_1672_cast_fp16, var_1674_cast_fp16, var_1676_cast_fp16, var_1678_cast_fp16))[name = tensor("op_1792_cast_fp16")]; tensor var_1794_interleave_0 = const()[name = tensor("op_1794_interleave_0"), val = tensor(false)]; tensor var_1794_cast_fp16 = concat(axis = var_999, interleave = var_1794_interleave_0, values = (var_1680_cast_fp16, var_1682_cast_fp16, var_1684_cast_fp16, var_1686_cast_fp16, var_1688_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1794_cast_fp16")]; tensor var_1796_interleave_0 = const()[name = tensor("op_1796_interleave_0"), val = tensor(false)]; tensor var_1796_cast_fp16 = concat(axis = var_999, interleave = var_1796_interleave_0, values = (var_1692_cast_fp16, var_1694_cast_fp16, var_1696_cast_fp16, var_1698_cast_fp16, var_1700_cast_fp16, var_1702_cast_fp16))[name = tensor("op_1796_cast_fp16")]; tensor var_1798_interleave_0 = const()[name = tensor("op_1798_interleave_0"), val = tensor(false)]; tensor var_1798_cast_fp16 = concat(axis = var_999, interleave = var_1798_interleave_0, values = (var_1704_cast_fp16, var_1706_cast_fp16, var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16))[name = tensor("op_1798_cast_fp16")]; tensor var_1800_interleave_0 = const()[name = tensor("op_1800_interleave_0"), val = tensor(false)]; tensor var_1800_cast_fp16 = concat(axis = var_999, interleave = var_1800_interleave_0, values = (var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16))[name = tensor("op_1800_cast_fp16")]; tensor var_1802_interleave_0 = const()[name = tensor("op_1802_interleave_0"), val = tensor(false)]; tensor var_1802_cast_fp16 = concat(axis = var_999, interleave = var_1802_interleave_0, values = (var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16))[name = tensor("op_1802_cast_fp16")]; tensor var_1804_interleave_0 = const()[name = tensor("op_1804_interleave_0"), val = tensor(false)]; tensor var_1804_cast_fp16 = concat(axis = var_999, interleave = var_1804_interleave_0, values = (var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16))[name = tensor("op_1804_cast_fp16")]; tensor var_1806_interleave_0 = const()[name = tensor("op_1806_interleave_0"), val = tensor(false)]; tensor var_1806_cast_fp16 = concat(axis = var_999, interleave = var_1806_interleave_0, values = (var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16, var_1762_cast_fp16))[name = tensor("op_1806_cast_fp16")]; tensor var_1808_interleave_0 = const()[name = tensor("op_1808_interleave_0"), val = tensor(false)]; tensor var_1808_cast_fp16 = concat(axis = var_999, interleave = var_1808_interleave_0, values = (var_1764_cast_fp16, var_1766_cast_fp16, var_1768_cast_fp16, var_1770_cast_fp16, var_1772_cast_fp16, var_1774_cast_fp16))[name = tensor("op_1808_cast_fp16")]; tensor var_1810_interleave_0 = const()[name = tensor("op_1810_interleave_0"), val = tensor(false)]; tensor var_1810_cast_fp16 = concat(axis = var_999, interleave = var_1810_interleave_0, values = (var_1776_cast_fp16, var_1778_cast_fp16, var_1780_cast_fp16, var_1782_cast_fp16, var_1784_cast_fp16, var_1786_cast_fp16))[name = tensor("op_1810_cast_fp16")]; tensor input_9_interleave_0 = const()[name = tensor("input_9_interleave_0"), val = tensor(false)]; tensor input_9_cast_fp16 = concat(axis = var_1015, interleave = input_9_interleave_0, values = (var_1788_cast_fp16, var_1790_cast_fp16, var_1792_cast_fp16, var_1794_cast_fp16, var_1796_cast_fp16, var_1798_cast_fp16, var_1800_cast_fp16, var_1802_cast_fp16, var_1804_cast_fp16, var_1806_cast_fp16, var_1808_cast_fp16, var_1810_cast_fp16))[name = tensor("input_9_cast_fp16")]; tensor obj_7_pad_type_0 = const()[name = tensor("obj_7_pad_type_0"), val = tensor("valid")]; tensor obj_7_strides_0 = const()[name = tensor("obj_7_strides_0"), val = tensor([1, 1])]; tensor obj_7_pad_0 = const()[name = tensor("obj_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_7_dilations_0 = const()[name = tensor("obj_7_dilations_0"), val = tensor([1, 1])]; tensor obj_7_groups_0 = const()[name = tensor("obj_7_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23938944)))]; tensor layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25118656)))]; tensor obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("obj_7_cast_fp16")]; tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; tensor var_1829_to_fp16 = const()[name = tensor("op_1829_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_1829_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25120256)))]; tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25121856)))]; tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor input_13_pad_type_0 = const()[name = tensor("input_13_pad_type_0"), val = tensor("valid")]; tensor input_13_strides_0 = const()[name = tensor("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = tensor("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = tensor("input_13_dilations_0"), val = tensor([1, 1])]; tensor input_13_groups_0 = const()[name = tensor("input_13_groups_0"), val = tensor(1)]; tensor layers_1_fc1_weight_to_fp16 = const()[name = tensor("layers_1_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25123456)))]; tensor layers_1_fc1_bias_to_fp16 = const()[name = tensor("layers_1_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29842112)))]; tensor input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor hidden_states_7_pad_type_0 = const()[name = tensor("hidden_states_7_pad_type_0"), val = tensor("valid")]; tensor hidden_states_7_strides_0 = const()[name = tensor("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = tensor("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = tensor("hidden_states_7_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_7_groups_0 = const()[name = tensor("hidden_states_7_groups_0"), val = tensor(1)]; tensor layers_1_fc2_weight_to_fp16 = const()[name = tensor("layers_1_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29848320)))]; tensor layers_1_fc2_bias_to_fp16 = const()[name = tensor("layers_1_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34566976)))]; tensor hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; tensor var_1861 = const()[name = tensor("op_1861"), val = tensor(3)]; tensor var_1877 = const()[name = tensor("op_1877"), val = tensor(1)]; tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; tensor var_1894_to_fp16 = const()[name = tensor("op_1894_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_1894_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34568576)))]; tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34570176)))]; tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; tensor query_5_pad_type_0 = const()[name = tensor("query_5_pad_type_0"), val = tensor("valid")]; tensor query_5_strides_0 = const()[name = tensor("query_5_strides_0"), val = tensor([1, 1])]; tensor query_5_pad_0 = const()[name = tensor("query_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_5_dilations_0 = const()[name = tensor("query_5_dilations_0"), val = tensor([1, 1])]; tensor query_5_groups_0 = const()[name = tensor("query_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34571776)))]; tensor layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35751488)))]; tensor query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("query_5_cast_fp16")]; tensor key_5_pad_type_0 = const()[name = tensor("key_5_pad_type_0"), val = tensor("valid")]; tensor key_5_strides_0 = const()[name = tensor("key_5_strides_0"), val = tensor([1, 1])]; tensor key_5_pad_0 = const()[name = tensor("key_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_5_dilations_0 = const()[name = tensor("key_5_dilations_0"), val = tensor([1, 1])]; tensor key_5_groups_0 = const()[name = tensor("key_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35753088)))]; tensor key_5_cast_fp16 = conv(dilations = key_5_dilations_0, groups = key_5_groups_0, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("key_5_cast_fp16")]; tensor value_5_pad_type_0 = const()[name = tensor("value_5_pad_type_0"), val = tensor("valid")]; tensor value_5_strides_0 = const()[name = tensor("value_5_strides_0"), val = tensor([1, 1])]; tensor value_5_pad_0 = const()[name = tensor("value_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_5_dilations_0 = const()[name = tensor("value_5_dilations_0"), val = tensor([1, 1])]; tensor value_5_groups_0 = const()[name = tensor("value_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36932800)))]; tensor layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38112512)))]; tensor value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = value_5_dilations_0, groups = value_5_groups_0, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("value_5_cast_fp16")]; tensor var_1929_begin_0 = const()[name = tensor("op_1929_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1929_end_0 = const()[name = tensor("op_1929_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1929_end_mask_0 = const()[name = tensor("op_1929_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1929_cast_fp16 = slice_by_index(begin = var_1929_begin_0, end = var_1929_end_0, end_mask = var_1929_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1929_cast_fp16")]; tensor var_1933_begin_0 = const()[name = tensor("op_1933_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1933_end_0 = const()[name = tensor("op_1933_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1933_end_mask_0 = const()[name = tensor("op_1933_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1933_cast_fp16 = slice_by_index(begin = var_1933_begin_0, end = var_1933_end_0, end_mask = var_1933_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1933_cast_fp16")]; tensor var_1937_begin_0 = const()[name = tensor("op_1937_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1937_end_0 = const()[name = tensor("op_1937_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1937_end_mask_0 = const()[name = tensor("op_1937_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1937_cast_fp16 = slice_by_index(begin = var_1937_begin_0, end = var_1937_end_0, end_mask = var_1937_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1937_cast_fp16")]; tensor var_1941_begin_0 = const()[name = tensor("op_1941_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1941_end_0 = const()[name = tensor("op_1941_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1941_end_mask_0 = const()[name = tensor("op_1941_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1941_cast_fp16 = slice_by_index(begin = var_1941_begin_0, end = var_1941_end_0, end_mask = var_1941_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1941_cast_fp16")]; tensor var_1945_begin_0 = const()[name = tensor("op_1945_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1945_end_0 = const()[name = tensor("op_1945_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1945_end_mask_0 = const()[name = tensor("op_1945_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1945_cast_fp16 = slice_by_index(begin = var_1945_begin_0, end = var_1945_end_0, end_mask = var_1945_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1945_cast_fp16")]; tensor var_1949_begin_0 = const()[name = tensor("op_1949_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1949_end_0 = const()[name = tensor("op_1949_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1949_end_mask_0 = const()[name = tensor("op_1949_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1949_cast_fp16 = slice_by_index(begin = var_1949_begin_0, end = var_1949_end_0, end_mask = var_1949_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1949_cast_fp16")]; tensor var_1953_begin_0 = const()[name = tensor("op_1953_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1953_end_0 = const()[name = tensor("op_1953_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1953_end_mask_0 = const()[name = tensor("op_1953_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1953_cast_fp16 = slice_by_index(begin = var_1953_begin_0, end = var_1953_end_0, end_mask = var_1953_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1953_cast_fp16")]; tensor var_1957_begin_0 = const()[name = tensor("op_1957_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1957_end_0 = const()[name = tensor("op_1957_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_1957_end_mask_0 = const()[name = tensor("op_1957_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1957_cast_fp16 = slice_by_index(begin = var_1957_begin_0, end = var_1957_end_0, end_mask = var_1957_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1957_cast_fp16")]; tensor var_1961_begin_0 = const()[name = tensor("op_1961_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_1961_end_0 = const()[name = tensor("op_1961_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_1961_end_mask_0 = const()[name = tensor("op_1961_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1961_cast_fp16 = slice_by_index(begin = var_1961_begin_0, end = var_1961_end_0, end_mask = var_1961_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1961_cast_fp16")]; tensor var_1965_begin_0 = const()[name = tensor("op_1965_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_1965_end_0 = const()[name = tensor("op_1965_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_1965_end_mask_0 = const()[name = tensor("op_1965_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1965_cast_fp16 = slice_by_index(begin = var_1965_begin_0, end = var_1965_end_0, end_mask = var_1965_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1965_cast_fp16")]; tensor var_1969_begin_0 = const()[name = tensor("op_1969_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_1969_end_0 = const()[name = tensor("op_1969_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_1969_end_mask_0 = const()[name = tensor("op_1969_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1969_cast_fp16 = slice_by_index(begin = var_1969_begin_0, end = var_1969_end_0, end_mask = var_1969_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1969_cast_fp16")]; tensor var_1973_begin_0 = const()[name = tensor("op_1973_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_1973_end_0 = const()[name = tensor("op_1973_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1973_end_mask_0 = const()[name = tensor("op_1973_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1973_cast_fp16 = slice_by_index(begin = var_1973_begin_0, end = var_1973_end_0, end_mask = var_1973_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1973_cast_fp16")]; tensor var_1976_begin_0 = const()[name = tensor("op_1976_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1976_end_0 = const()[name = tensor("op_1976_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1976_end_mask_0 = const()[name = tensor("op_1976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1976_cast_fp16 = slice_by_index(begin = var_1976_begin_0, end = var_1976_end_0, end_mask = var_1976_end_mask_0, x = var_1929_cast_fp16)[name = tensor("op_1976_cast_fp16")]; tensor var_1977_begin_0 = const()[name = tensor("op_1977_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1977_end_0 = const()[name = tensor("op_1977_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1977_end_mask_0 = const()[name = tensor("op_1977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1977_cast_fp16 = slice_by_index(begin = var_1977_begin_0, end = var_1977_end_0, end_mask = var_1977_end_mask_0, x = var_1929_cast_fp16)[name = tensor("op_1977_cast_fp16")]; tensor var_1978_begin_0 = const()[name = tensor("op_1978_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1978_end_0 = const()[name = tensor("op_1978_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1978_end_mask_0 = const()[name = tensor("op_1978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1978_cast_fp16 = slice_by_index(begin = var_1978_begin_0, end = var_1978_end_0, end_mask = var_1978_end_mask_0, x = var_1929_cast_fp16)[name = tensor("op_1978_cast_fp16")]; tensor var_1979_begin_0 = const()[name = tensor("op_1979_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1979_end_0 = const()[name = tensor("op_1979_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1979_end_mask_0 = const()[name = tensor("op_1979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1979_cast_fp16 = slice_by_index(begin = var_1979_begin_0, end = var_1979_end_0, end_mask = var_1979_end_mask_0, x = var_1929_cast_fp16)[name = tensor("op_1979_cast_fp16")]; tensor var_1980_begin_0 = const()[name = tensor("op_1980_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1980_end_0 = const()[name = tensor("op_1980_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1980_end_mask_0 = const()[name = tensor("op_1980_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1980_cast_fp16 = slice_by_index(begin = var_1980_begin_0, end = var_1980_end_0, end_mask = var_1980_end_mask_0, x = var_1929_cast_fp16)[name = tensor("op_1980_cast_fp16")]; tensor var_1981_begin_0 = const()[name = tensor("op_1981_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1981_end_0 = const()[name = tensor("op_1981_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1981_end_mask_0 = const()[name = tensor("op_1981_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1981_cast_fp16 = slice_by_index(begin = var_1981_begin_0, end = var_1981_end_0, end_mask = var_1981_end_mask_0, x = var_1929_cast_fp16)[name = tensor("op_1981_cast_fp16")]; tensor var_1982_begin_0 = const()[name = tensor("op_1982_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1982_end_0 = const()[name = tensor("op_1982_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1982_end_mask_0 = const()[name = tensor("op_1982_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1982_cast_fp16 = slice_by_index(begin = var_1982_begin_0, end = var_1982_end_0, end_mask = var_1982_end_mask_0, x = var_1933_cast_fp16)[name = tensor("op_1982_cast_fp16")]; tensor var_1983_begin_0 = const()[name = tensor("op_1983_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1983_end_0 = const()[name = tensor("op_1983_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1983_end_mask_0 = const()[name = tensor("op_1983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1983_cast_fp16 = slice_by_index(begin = var_1983_begin_0, end = var_1983_end_0, end_mask = var_1983_end_mask_0, x = var_1933_cast_fp16)[name = tensor("op_1983_cast_fp16")]; tensor var_1984_begin_0 = const()[name = tensor("op_1984_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1984_end_0 = const()[name = tensor("op_1984_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1984_end_mask_0 = const()[name = tensor("op_1984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1984_cast_fp16 = slice_by_index(begin = var_1984_begin_0, end = var_1984_end_0, end_mask = var_1984_end_mask_0, x = var_1933_cast_fp16)[name = tensor("op_1984_cast_fp16")]; tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = var_1933_cast_fp16)[name = tensor("op_1985_cast_fp16")]; tensor var_1986_begin_0 = const()[name = tensor("op_1986_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1986_end_0 = const()[name = tensor("op_1986_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1986_end_mask_0 = const()[name = tensor("op_1986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1986_cast_fp16 = slice_by_index(begin = var_1986_begin_0, end = var_1986_end_0, end_mask = var_1986_end_mask_0, x = var_1933_cast_fp16)[name = tensor("op_1986_cast_fp16")]; tensor var_1987_begin_0 = const()[name = tensor("op_1987_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1987_end_0 = const()[name = tensor("op_1987_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1987_end_mask_0 = const()[name = tensor("op_1987_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1987_cast_fp16 = slice_by_index(begin = var_1987_begin_0, end = var_1987_end_0, end_mask = var_1987_end_mask_0, x = var_1933_cast_fp16)[name = tensor("op_1987_cast_fp16")]; tensor var_1988_begin_0 = const()[name = tensor("op_1988_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1988_end_0 = const()[name = tensor("op_1988_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1988_end_mask_0 = const()[name = tensor("op_1988_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1988_cast_fp16 = slice_by_index(begin = var_1988_begin_0, end = var_1988_end_0, end_mask = var_1988_end_mask_0, x = var_1937_cast_fp16)[name = tensor("op_1988_cast_fp16")]; tensor var_1989_begin_0 = const()[name = tensor("op_1989_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1989_end_0 = const()[name = tensor("op_1989_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1989_end_mask_0 = const()[name = tensor("op_1989_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1989_cast_fp16 = slice_by_index(begin = var_1989_begin_0, end = var_1989_end_0, end_mask = var_1989_end_mask_0, x = var_1937_cast_fp16)[name = tensor("op_1989_cast_fp16")]; tensor var_1990_begin_0 = const()[name = tensor("op_1990_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1990_end_0 = const()[name = tensor("op_1990_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1990_end_mask_0 = const()[name = tensor("op_1990_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1990_cast_fp16 = slice_by_index(begin = var_1990_begin_0, end = var_1990_end_0, end_mask = var_1990_end_mask_0, x = var_1937_cast_fp16)[name = tensor("op_1990_cast_fp16")]; tensor var_1991_begin_0 = const()[name = tensor("op_1991_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1991_end_0 = const()[name = tensor("op_1991_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1991_end_mask_0 = const()[name = tensor("op_1991_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1991_cast_fp16 = slice_by_index(begin = var_1991_begin_0, end = var_1991_end_0, end_mask = var_1991_end_mask_0, x = var_1937_cast_fp16)[name = tensor("op_1991_cast_fp16")]; tensor var_1992_begin_0 = const()[name = tensor("op_1992_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1992_end_0 = const()[name = tensor("op_1992_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1992_end_mask_0 = const()[name = tensor("op_1992_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1992_cast_fp16 = slice_by_index(begin = var_1992_begin_0, end = var_1992_end_0, end_mask = var_1992_end_mask_0, x = var_1937_cast_fp16)[name = tensor("op_1992_cast_fp16")]; tensor var_1993_begin_0 = const()[name = tensor("op_1993_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1993_end_0 = const()[name = tensor("op_1993_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1993_end_mask_0 = const()[name = tensor("op_1993_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1993_cast_fp16 = slice_by_index(begin = var_1993_begin_0, end = var_1993_end_0, end_mask = var_1993_end_mask_0, x = var_1937_cast_fp16)[name = tensor("op_1993_cast_fp16")]; tensor var_1994_begin_0 = const()[name = tensor("op_1994_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1994_end_0 = const()[name = tensor("op_1994_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1994_end_mask_0 = const()[name = tensor("op_1994_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1994_cast_fp16 = slice_by_index(begin = var_1994_begin_0, end = var_1994_end_0, end_mask = var_1994_end_mask_0, x = var_1941_cast_fp16)[name = tensor("op_1994_cast_fp16")]; tensor var_1995_begin_0 = const()[name = tensor("op_1995_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1995_end_0 = const()[name = tensor("op_1995_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1995_end_mask_0 = const()[name = tensor("op_1995_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1995_cast_fp16 = slice_by_index(begin = var_1995_begin_0, end = var_1995_end_0, end_mask = var_1995_end_mask_0, x = var_1941_cast_fp16)[name = tensor("op_1995_cast_fp16")]; tensor var_1996_begin_0 = const()[name = tensor("op_1996_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1996_end_0 = const()[name = tensor("op_1996_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1996_end_mask_0 = const()[name = tensor("op_1996_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1996_cast_fp16 = slice_by_index(begin = var_1996_begin_0, end = var_1996_end_0, end_mask = var_1996_end_mask_0, x = var_1941_cast_fp16)[name = tensor("op_1996_cast_fp16")]; tensor var_1997_begin_0 = const()[name = tensor("op_1997_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1997_end_0 = const()[name = tensor("op_1997_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1997_end_mask_0 = const()[name = tensor("op_1997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1997_cast_fp16 = slice_by_index(begin = var_1997_begin_0, end = var_1997_end_0, end_mask = var_1997_end_mask_0, x = var_1941_cast_fp16)[name = tensor("op_1997_cast_fp16")]; tensor var_1998_begin_0 = const()[name = tensor("op_1998_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1998_end_0 = const()[name = tensor("op_1998_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1998_end_mask_0 = const()[name = tensor("op_1998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1998_cast_fp16 = slice_by_index(begin = var_1998_begin_0, end = var_1998_end_0, end_mask = var_1998_end_mask_0, x = var_1941_cast_fp16)[name = tensor("op_1998_cast_fp16")]; tensor var_1999_begin_0 = const()[name = tensor("op_1999_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1999_end_0 = const()[name = tensor("op_1999_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1999_end_mask_0 = const()[name = tensor("op_1999_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1999_cast_fp16 = slice_by_index(begin = var_1999_begin_0, end = var_1999_end_0, end_mask = var_1999_end_mask_0, x = var_1941_cast_fp16)[name = tensor("op_1999_cast_fp16")]; tensor var_2000_begin_0 = const()[name = tensor("op_2000_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2000_end_0 = const()[name = tensor("op_2000_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2000_end_mask_0 = const()[name = tensor("op_2000_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2000_cast_fp16 = slice_by_index(begin = var_2000_begin_0, end = var_2000_end_0, end_mask = var_2000_end_mask_0, x = var_1945_cast_fp16)[name = tensor("op_2000_cast_fp16")]; tensor var_2001_begin_0 = const()[name = tensor("op_2001_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2001_end_0 = const()[name = tensor("op_2001_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2001_end_mask_0 = const()[name = tensor("op_2001_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2001_cast_fp16 = slice_by_index(begin = var_2001_begin_0, end = var_2001_end_0, end_mask = var_2001_end_mask_0, x = var_1945_cast_fp16)[name = tensor("op_2001_cast_fp16")]; tensor var_2002_begin_0 = const()[name = tensor("op_2002_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2002_end_0 = const()[name = tensor("op_2002_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2002_end_mask_0 = const()[name = tensor("op_2002_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2002_cast_fp16 = slice_by_index(begin = var_2002_begin_0, end = var_2002_end_0, end_mask = var_2002_end_mask_0, x = var_1945_cast_fp16)[name = tensor("op_2002_cast_fp16")]; tensor var_2003_begin_0 = const()[name = tensor("op_2003_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2003_end_0 = const()[name = tensor("op_2003_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2003_end_mask_0 = const()[name = tensor("op_2003_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2003_cast_fp16 = slice_by_index(begin = var_2003_begin_0, end = var_2003_end_0, end_mask = var_2003_end_mask_0, x = var_1945_cast_fp16)[name = tensor("op_2003_cast_fp16")]; tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = var_1945_cast_fp16)[name = tensor("op_2004_cast_fp16")]; tensor var_2005_begin_0 = const()[name = tensor("op_2005_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2005_end_0 = const()[name = tensor("op_2005_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2005_end_mask_0 = const()[name = tensor("op_2005_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2005_cast_fp16 = slice_by_index(begin = var_2005_begin_0, end = var_2005_end_0, end_mask = var_2005_end_mask_0, x = var_1945_cast_fp16)[name = tensor("op_2005_cast_fp16")]; tensor var_2006_begin_0 = const()[name = tensor("op_2006_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2006_end_0 = const()[name = tensor("op_2006_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2006_end_mask_0 = const()[name = tensor("op_2006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2006_cast_fp16 = slice_by_index(begin = var_2006_begin_0, end = var_2006_end_0, end_mask = var_2006_end_mask_0, x = var_1949_cast_fp16)[name = tensor("op_2006_cast_fp16")]; tensor var_2007_begin_0 = const()[name = tensor("op_2007_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2007_end_0 = const()[name = tensor("op_2007_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2007_end_mask_0 = const()[name = tensor("op_2007_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2007_cast_fp16 = slice_by_index(begin = var_2007_begin_0, end = var_2007_end_0, end_mask = var_2007_end_mask_0, x = var_1949_cast_fp16)[name = tensor("op_2007_cast_fp16")]; tensor var_2008_begin_0 = const()[name = tensor("op_2008_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2008_end_0 = const()[name = tensor("op_2008_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2008_end_mask_0 = const()[name = tensor("op_2008_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2008_cast_fp16 = slice_by_index(begin = var_2008_begin_0, end = var_2008_end_0, end_mask = var_2008_end_mask_0, x = var_1949_cast_fp16)[name = tensor("op_2008_cast_fp16")]; tensor var_2009_begin_0 = const()[name = tensor("op_2009_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2009_end_0 = const()[name = tensor("op_2009_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2009_end_mask_0 = const()[name = tensor("op_2009_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2009_cast_fp16 = slice_by_index(begin = var_2009_begin_0, end = var_2009_end_0, end_mask = var_2009_end_mask_0, x = var_1949_cast_fp16)[name = tensor("op_2009_cast_fp16")]; tensor var_2010_begin_0 = const()[name = tensor("op_2010_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2010_end_0 = const()[name = tensor("op_2010_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2010_end_mask_0 = const()[name = tensor("op_2010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2010_cast_fp16 = slice_by_index(begin = var_2010_begin_0, end = var_2010_end_0, end_mask = var_2010_end_mask_0, x = var_1949_cast_fp16)[name = tensor("op_2010_cast_fp16")]; tensor var_2011_begin_0 = const()[name = tensor("op_2011_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2011_end_0 = const()[name = tensor("op_2011_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2011_end_mask_0 = const()[name = tensor("op_2011_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2011_cast_fp16 = slice_by_index(begin = var_2011_begin_0, end = var_2011_end_0, end_mask = var_2011_end_mask_0, x = var_1949_cast_fp16)[name = tensor("op_2011_cast_fp16")]; tensor var_2012_begin_0 = const()[name = tensor("op_2012_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2012_end_0 = const()[name = tensor("op_2012_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2012_end_mask_0 = const()[name = tensor("op_2012_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2012_cast_fp16 = slice_by_index(begin = var_2012_begin_0, end = var_2012_end_0, end_mask = var_2012_end_mask_0, x = var_1953_cast_fp16)[name = tensor("op_2012_cast_fp16")]; tensor var_2013_begin_0 = const()[name = tensor("op_2013_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2013_end_0 = const()[name = tensor("op_2013_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2013_end_mask_0 = const()[name = tensor("op_2013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2013_cast_fp16 = slice_by_index(begin = var_2013_begin_0, end = var_2013_end_0, end_mask = var_2013_end_mask_0, x = var_1953_cast_fp16)[name = tensor("op_2013_cast_fp16")]; tensor var_2014_begin_0 = const()[name = tensor("op_2014_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2014_end_0 = const()[name = tensor("op_2014_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2014_end_mask_0 = const()[name = tensor("op_2014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2014_cast_fp16 = slice_by_index(begin = var_2014_begin_0, end = var_2014_end_0, end_mask = var_2014_end_mask_0, x = var_1953_cast_fp16)[name = tensor("op_2014_cast_fp16")]; tensor var_2015_begin_0 = const()[name = tensor("op_2015_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2015_end_0 = const()[name = tensor("op_2015_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2015_end_mask_0 = const()[name = tensor("op_2015_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2015_cast_fp16 = slice_by_index(begin = var_2015_begin_0, end = var_2015_end_0, end_mask = var_2015_end_mask_0, x = var_1953_cast_fp16)[name = tensor("op_2015_cast_fp16")]; tensor var_2016_begin_0 = const()[name = tensor("op_2016_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2016_end_0 = const()[name = tensor("op_2016_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2016_end_mask_0 = const()[name = tensor("op_2016_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2016_cast_fp16 = slice_by_index(begin = var_2016_begin_0, end = var_2016_end_0, end_mask = var_2016_end_mask_0, x = var_1953_cast_fp16)[name = tensor("op_2016_cast_fp16")]; tensor var_2017_begin_0 = const()[name = tensor("op_2017_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2017_end_0 = const()[name = tensor("op_2017_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2017_end_mask_0 = const()[name = tensor("op_2017_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2017_cast_fp16 = slice_by_index(begin = var_2017_begin_0, end = var_2017_end_0, end_mask = var_2017_end_mask_0, x = var_1953_cast_fp16)[name = tensor("op_2017_cast_fp16")]; tensor var_2018_begin_0 = const()[name = tensor("op_2018_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2018_end_0 = const()[name = tensor("op_2018_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2018_end_mask_0 = const()[name = tensor("op_2018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2018_cast_fp16 = slice_by_index(begin = var_2018_begin_0, end = var_2018_end_0, end_mask = var_2018_end_mask_0, x = var_1957_cast_fp16)[name = tensor("op_2018_cast_fp16")]; tensor var_2019_begin_0 = const()[name = tensor("op_2019_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2019_end_0 = const()[name = tensor("op_2019_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2019_end_mask_0 = const()[name = tensor("op_2019_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2019_cast_fp16 = slice_by_index(begin = var_2019_begin_0, end = var_2019_end_0, end_mask = var_2019_end_mask_0, x = var_1957_cast_fp16)[name = tensor("op_2019_cast_fp16")]; tensor var_2020_begin_0 = const()[name = tensor("op_2020_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2020_end_0 = const()[name = tensor("op_2020_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2020_end_mask_0 = const()[name = tensor("op_2020_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2020_cast_fp16 = slice_by_index(begin = var_2020_begin_0, end = var_2020_end_0, end_mask = var_2020_end_mask_0, x = var_1957_cast_fp16)[name = tensor("op_2020_cast_fp16")]; tensor var_2021_begin_0 = const()[name = tensor("op_2021_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2021_end_0 = const()[name = tensor("op_2021_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2021_end_mask_0 = const()[name = tensor("op_2021_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2021_cast_fp16 = slice_by_index(begin = var_2021_begin_0, end = var_2021_end_0, end_mask = var_2021_end_mask_0, x = var_1957_cast_fp16)[name = tensor("op_2021_cast_fp16")]; tensor var_2022_begin_0 = const()[name = tensor("op_2022_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2022_end_0 = const()[name = tensor("op_2022_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2022_end_mask_0 = const()[name = tensor("op_2022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2022_cast_fp16 = slice_by_index(begin = var_2022_begin_0, end = var_2022_end_0, end_mask = var_2022_end_mask_0, x = var_1957_cast_fp16)[name = tensor("op_2022_cast_fp16")]; tensor var_2023_begin_0 = const()[name = tensor("op_2023_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2023_end_0 = const()[name = tensor("op_2023_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2023_end_mask_0 = const()[name = tensor("op_2023_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2023_cast_fp16 = slice_by_index(begin = var_2023_begin_0, end = var_2023_end_0, end_mask = var_2023_end_mask_0, x = var_1957_cast_fp16)[name = tensor("op_2023_cast_fp16")]; tensor var_2024_begin_0 = const()[name = tensor("op_2024_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2024_end_0 = const()[name = tensor("op_2024_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2024_end_mask_0 = const()[name = tensor("op_2024_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2024_cast_fp16 = slice_by_index(begin = var_2024_begin_0, end = var_2024_end_0, end_mask = var_2024_end_mask_0, x = var_1961_cast_fp16)[name = tensor("op_2024_cast_fp16")]; tensor var_2025_begin_0 = const()[name = tensor("op_2025_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2025_end_0 = const()[name = tensor("op_2025_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2025_end_mask_0 = const()[name = tensor("op_2025_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2025_cast_fp16 = slice_by_index(begin = var_2025_begin_0, end = var_2025_end_0, end_mask = var_2025_end_mask_0, x = var_1961_cast_fp16)[name = tensor("op_2025_cast_fp16")]; tensor var_2026_begin_0 = const()[name = tensor("op_2026_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2026_end_0 = const()[name = tensor("op_2026_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2026_end_mask_0 = const()[name = tensor("op_2026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2026_cast_fp16 = slice_by_index(begin = var_2026_begin_0, end = var_2026_end_0, end_mask = var_2026_end_mask_0, x = var_1961_cast_fp16)[name = tensor("op_2026_cast_fp16")]; tensor var_2027_begin_0 = const()[name = tensor("op_2027_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2027_end_0 = const()[name = tensor("op_2027_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2027_end_mask_0 = const()[name = tensor("op_2027_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2027_cast_fp16 = slice_by_index(begin = var_2027_begin_0, end = var_2027_end_0, end_mask = var_2027_end_mask_0, x = var_1961_cast_fp16)[name = tensor("op_2027_cast_fp16")]; tensor var_2028_begin_0 = const()[name = tensor("op_2028_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2028_end_0 = const()[name = tensor("op_2028_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2028_end_mask_0 = const()[name = tensor("op_2028_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2028_cast_fp16 = slice_by_index(begin = var_2028_begin_0, end = var_2028_end_0, end_mask = var_2028_end_mask_0, x = var_1961_cast_fp16)[name = tensor("op_2028_cast_fp16")]; tensor var_2029_begin_0 = const()[name = tensor("op_2029_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2029_end_0 = const()[name = tensor("op_2029_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2029_end_mask_0 = const()[name = tensor("op_2029_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2029_cast_fp16 = slice_by_index(begin = var_2029_begin_0, end = var_2029_end_0, end_mask = var_2029_end_mask_0, x = var_1961_cast_fp16)[name = tensor("op_2029_cast_fp16")]; tensor var_2030_begin_0 = const()[name = tensor("op_2030_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2030_end_0 = const()[name = tensor("op_2030_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2030_end_mask_0 = const()[name = tensor("op_2030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2030_cast_fp16 = slice_by_index(begin = var_2030_begin_0, end = var_2030_end_0, end_mask = var_2030_end_mask_0, x = var_1965_cast_fp16)[name = tensor("op_2030_cast_fp16")]; tensor var_2031_begin_0 = const()[name = tensor("op_2031_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2031_end_0 = const()[name = tensor("op_2031_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2031_end_mask_0 = const()[name = tensor("op_2031_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2031_cast_fp16 = slice_by_index(begin = var_2031_begin_0, end = var_2031_end_0, end_mask = var_2031_end_mask_0, x = var_1965_cast_fp16)[name = tensor("op_2031_cast_fp16")]; tensor var_2032_begin_0 = const()[name = tensor("op_2032_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2032_end_0 = const()[name = tensor("op_2032_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2032_end_mask_0 = const()[name = tensor("op_2032_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2032_cast_fp16 = slice_by_index(begin = var_2032_begin_0, end = var_2032_end_0, end_mask = var_2032_end_mask_0, x = var_1965_cast_fp16)[name = tensor("op_2032_cast_fp16")]; tensor var_2033_begin_0 = const()[name = tensor("op_2033_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2033_end_0 = const()[name = tensor("op_2033_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2033_end_mask_0 = const()[name = tensor("op_2033_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2033_cast_fp16 = slice_by_index(begin = var_2033_begin_0, end = var_2033_end_0, end_mask = var_2033_end_mask_0, x = var_1965_cast_fp16)[name = tensor("op_2033_cast_fp16")]; tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = var_1965_cast_fp16)[name = tensor("op_2034_cast_fp16")]; tensor var_2035_begin_0 = const()[name = tensor("op_2035_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2035_end_0 = const()[name = tensor("op_2035_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2035_end_mask_0 = const()[name = tensor("op_2035_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2035_cast_fp16 = slice_by_index(begin = var_2035_begin_0, end = var_2035_end_0, end_mask = var_2035_end_mask_0, x = var_1965_cast_fp16)[name = tensor("op_2035_cast_fp16")]; tensor var_2036_begin_0 = const()[name = tensor("op_2036_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2036_end_0 = const()[name = tensor("op_2036_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2036_end_mask_0 = const()[name = tensor("op_2036_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2036_cast_fp16 = slice_by_index(begin = var_2036_begin_0, end = var_2036_end_0, end_mask = var_2036_end_mask_0, x = var_1969_cast_fp16)[name = tensor("op_2036_cast_fp16")]; tensor var_2037_begin_0 = const()[name = tensor("op_2037_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2037_end_0 = const()[name = tensor("op_2037_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2037_end_mask_0 = const()[name = tensor("op_2037_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2037_cast_fp16 = slice_by_index(begin = var_2037_begin_0, end = var_2037_end_0, end_mask = var_2037_end_mask_0, x = var_1969_cast_fp16)[name = tensor("op_2037_cast_fp16")]; tensor var_2038_begin_0 = const()[name = tensor("op_2038_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2038_end_0 = const()[name = tensor("op_2038_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2038_end_mask_0 = const()[name = tensor("op_2038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2038_cast_fp16 = slice_by_index(begin = var_2038_begin_0, end = var_2038_end_0, end_mask = var_2038_end_mask_0, x = var_1969_cast_fp16)[name = tensor("op_2038_cast_fp16")]; tensor var_2039_begin_0 = const()[name = tensor("op_2039_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2039_end_0 = const()[name = tensor("op_2039_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2039_end_mask_0 = const()[name = tensor("op_2039_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2039_cast_fp16 = slice_by_index(begin = var_2039_begin_0, end = var_2039_end_0, end_mask = var_2039_end_mask_0, x = var_1969_cast_fp16)[name = tensor("op_2039_cast_fp16")]; tensor var_2040_begin_0 = const()[name = tensor("op_2040_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2040_end_0 = const()[name = tensor("op_2040_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2040_end_mask_0 = const()[name = tensor("op_2040_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2040_cast_fp16 = slice_by_index(begin = var_2040_begin_0, end = var_2040_end_0, end_mask = var_2040_end_mask_0, x = var_1969_cast_fp16)[name = tensor("op_2040_cast_fp16")]; tensor var_2041_begin_0 = const()[name = tensor("op_2041_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2041_end_0 = const()[name = tensor("op_2041_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2041_end_mask_0 = const()[name = tensor("op_2041_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2041_cast_fp16 = slice_by_index(begin = var_2041_begin_0, end = var_2041_end_0, end_mask = var_2041_end_mask_0, x = var_1969_cast_fp16)[name = tensor("op_2041_cast_fp16")]; tensor var_2042_begin_0 = const()[name = tensor("op_2042_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2042_end_0 = const()[name = tensor("op_2042_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2042_end_mask_0 = const()[name = tensor("op_2042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2042_cast_fp16 = slice_by_index(begin = var_2042_begin_0, end = var_2042_end_0, end_mask = var_2042_end_mask_0, x = var_1973_cast_fp16)[name = tensor("op_2042_cast_fp16")]; tensor var_2043_begin_0 = const()[name = tensor("op_2043_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2043_end_0 = const()[name = tensor("op_2043_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2043_end_mask_0 = const()[name = tensor("op_2043_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2043_cast_fp16 = slice_by_index(begin = var_2043_begin_0, end = var_2043_end_0, end_mask = var_2043_end_mask_0, x = var_1973_cast_fp16)[name = tensor("op_2043_cast_fp16")]; tensor var_2044_begin_0 = const()[name = tensor("op_2044_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2044_end_0 = const()[name = tensor("op_2044_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2044_end_mask_0 = const()[name = tensor("op_2044_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2044_cast_fp16 = slice_by_index(begin = var_2044_begin_0, end = var_2044_end_0, end_mask = var_2044_end_mask_0, x = var_1973_cast_fp16)[name = tensor("op_2044_cast_fp16")]; tensor var_2045_begin_0 = const()[name = tensor("op_2045_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2045_end_0 = const()[name = tensor("op_2045_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2045_end_mask_0 = const()[name = tensor("op_2045_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2045_cast_fp16 = slice_by_index(begin = var_2045_begin_0, end = var_2045_end_0, end_mask = var_2045_end_mask_0, x = var_1973_cast_fp16)[name = tensor("op_2045_cast_fp16")]; tensor var_2046_begin_0 = const()[name = tensor("op_2046_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2046_end_0 = const()[name = tensor("op_2046_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2046_end_mask_0 = const()[name = tensor("op_2046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2046_cast_fp16 = slice_by_index(begin = var_2046_begin_0, end = var_2046_end_0, end_mask = var_2046_end_mask_0, x = var_1973_cast_fp16)[name = tensor("op_2046_cast_fp16")]; tensor var_2047_begin_0 = const()[name = tensor("op_2047_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2047_end_0 = const()[name = tensor("op_2047_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2047_end_mask_0 = const()[name = tensor("op_2047_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2047_cast_fp16 = slice_by_index(begin = var_2047_begin_0, end = var_2047_end_0, end_mask = var_2047_end_mask_0, x = var_1973_cast_fp16)[name = tensor("op_2047_cast_fp16")]; tensor k_5_perm_0 = const()[name = tensor("k_5_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_2052_begin_0 = const()[name = tensor("op_2052_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2052_end_0 = const()[name = tensor("op_2052_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_2052_end_mask_0 = const()[name = tensor("op_2052_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor("transpose_9")]; tensor var_2052_cast_fp16 = slice_by_index(begin = var_2052_begin_0, end = var_2052_end_0, end_mask = var_2052_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2052_cast_fp16")]; tensor var_2056_begin_0 = const()[name = tensor("op_2056_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_2056_end_0 = const()[name = tensor("op_2056_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_2056_end_mask_0 = const()[name = tensor("op_2056_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2056_cast_fp16 = slice_by_index(begin = var_2056_begin_0, end = var_2056_end_0, end_mask = var_2056_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2056_cast_fp16")]; tensor var_2060_begin_0 = const()[name = tensor("op_2060_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_2060_end_0 = const()[name = tensor("op_2060_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_2060_end_mask_0 = const()[name = tensor("op_2060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2060_cast_fp16 = slice_by_index(begin = var_2060_begin_0, end = var_2060_end_0, end_mask = var_2060_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2060_cast_fp16")]; tensor var_2064_begin_0 = const()[name = tensor("op_2064_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_2064_end_0 = const()[name = tensor("op_2064_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_2064_end_mask_0 = const()[name = tensor("op_2064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2064_cast_fp16 = slice_by_index(begin = var_2064_begin_0, end = var_2064_end_0, end_mask = var_2064_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2064_cast_fp16")]; tensor var_2068_begin_0 = const()[name = tensor("op_2068_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2068_end_0 = const()[name = tensor("op_2068_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_2068_end_mask_0 = const()[name = tensor("op_2068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2068_cast_fp16 = slice_by_index(begin = var_2068_begin_0, end = var_2068_end_0, end_mask = var_2068_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2068_cast_fp16")]; tensor var_2072_begin_0 = const()[name = tensor("op_2072_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_2072_end_0 = const()[name = tensor("op_2072_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_2072_end_mask_0 = const()[name = tensor("op_2072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2072_cast_fp16 = slice_by_index(begin = var_2072_begin_0, end = var_2072_end_0, end_mask = var_2072_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2072_cast_fp16")]; tensor var_2076_begin_0 = const()[name = tensor("op_2076_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_2076_end_0 = const()[name = tensor("op_2076_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_2076_end_mask_0 = const()[name = tensor("op_2076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2076_cast_fp16 = slice_by_index(begin = var_2076_begin_0, end = var_2076_end_0, end_mask = var_2076_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2076_cast_fp16")]; tensor var_2080_begin_0 = const()[name = tensor("op_2080_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_2080_end_0 = const()[name = tensor("op_2080_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_2080_end_mask_0 = const()[name = tensor("op_2080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2080_cast_fp16 = slice_by_index(begin = var_2080_begin_0, end = var_2080_end_0, end_mask = var_2080_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2080_cast_fp16")]; tensor var_2084_begin_0 = const()[name = tensor("op_2084_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2084_end_0 = const()[name = tensor("op_2084_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_2084_end_mask_0 = const()[name = tensor("op_2084_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2084_cast_fp16 = slice_by_index(begin = var_2084_begin_0, end = var_2084_end_0, end_mask = var_2084_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2084_cast_fp16")]; tensor var_2088_begin_0 = const()[name = tensor("op_2088_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_2088_end_0 = const()[name = tensor("op_2088_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_2088_end_mask_0 = const()[name = tensor("op_2088_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2088_cast_fp16 = slice_by_index(begin = var_2088_begin_0, end = var_2088_end_0, end_mask = var_2088_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2088_cast_fp16")]; tensor var_2092_begin_0 = const()[name = tensor("op_2092_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_2092_end_0 = const()[name = tensor("op_2092_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_2092_end_mask_0 = const()[name = tensor("op_2092_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2092_cast_fp16 = slice_by_index(begin = var_2092_begin_0, end = var_2092_end_0, end_mask = var_2092_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2092_cast_fp16")]; tensor var_2096_begin_0 = const()[name = tensor("op_2096_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_2096_end_0 = const()[name = tensor("op_2096_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_2096_end_mask_0 = const()[name = tensor("op_2096_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2096_cast_fp16 = slice_by_index(begin = var_2096_begin_0, end = var_2096_end_0, end_mask = var_2096_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_2096_cast_fp16")]; tensor var_2098_begin_0 = const()[name = tensor("op_2098_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2098_end_0 = const()[name = tensor("op_2098_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2098_end_mask_0 = const()[name = tensor("op_2098_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2098_cast_fp16 = slice_by_index(begin = var_2098_begin_0, end = var_2098_end_0, end_mask = var_2098_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2098_cast_fp16")]; tensor var_2102_begin_0 = const()[name = tensor("op_2102_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2102_end_0 = const()[name = tensor("op_2102_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2102_end_mask_0 = const()[name = tensor("op_2102_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2102_cast_fp16 = slice_by_index(begin = var_2102_begin_0, end = var_2102_end_0, end_mask = var_2102_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2102_cast_fp16")]; tensor var_2106_begin_0 = const()[name = tensor("op_2106_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2106_end_0 = const()[name = tensor("op_2106_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2106_end_mask_0 = const()[name = tensor("op_2106_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2106_cast_fp16 = slice_by_index(begin = var_2106_begin_0, end = var_2106_end_0, end_mask = var_2106_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2106_cast_fp16")]; tensor var_2110_begin_0 = const()[name = tensor("op_2110_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2110_end_0 = const()[name = tensor("op_2110_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2110_end_mask_0 = const()[name = tensor("op_2110_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2110_cast_fp16 = slice_by_index(begin = var_2110_begin_0, end = var_2110_end_0, end_mask = var_2110_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2110_cast_fp16")]; tensor var_2114_begin_0 = const()[name = tensor("op_2114_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2114_end_0 = const()[name = tensor("op_2114_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2114_end_mask_0 = const()[name = tensor("op_2114_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2114_cast_fp16 = slice_by_index(begin = var_2114_begin_0, end = var_2114_end_0, end_mask = var_2114_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2114_cast_fp16")]; tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2118_cast_fp16")]; tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2122_cast_fp16")]; tensor var_2126_begin_0 = const()[name = tensor("op_2126_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2126_end_0 = const()[name = tensor("op_2126_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_2126_end_mask_0 = const()[name = tensor("op_2126_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2126_cast_fp16 = slice_by_index(begin = var_2126_begin_0, end = var_2126_end_0, end_mask = var_2126_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2126_cast_fp16")]; tensor var_2130_begin_0 = const()[name = tensor("op_2130_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_2130_end_0 = const()[name = tensor("op_2130_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_2130_end_mask_0 = const()[name = tensor("op_2130_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2130_cast_fp16 = slice_by_index(begin = var_2130_begin_0, end = var_2130_end_0, end_mask = var_2130_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2130_cast_fp16")]; tensor var_2134_begin_0 = const()[name = tensor("op_2134_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_2134_end_0 = const()[name = tensor("op_2134_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_2134_end_mask_0 = const()[name = tensor("op_2134_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2134_cast_fp16 = slice_by_index(begin = var_2134_begin_0, end = var_2134_end_0, end_mask = var_2134_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2134_cast_fp16")]; tensor var_2138_begin_0 = const()[name = tensor("op_2138_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_2138_end_0 = const()[name = tensor("op_2138_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_2138_end_mask_0 = const()[name = tensor("op_2138_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2138_cast_fp16 = slice_by_index(begin = var_2138_begin_0, end = var_2138_end_0, end_mask = var_2138_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2138_cast_fp16")]; tensor var_2142_begin_0 = const()[name = tensor("op_2142_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_2142_end_0 = const()[name = tensor("op_2142_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_2142_end_mask_0 = const()[name = tensor("op_2142_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2142_cast_fp16 = slice_by_index(begin = var_2142_begin_0, end = var_2142_end_0, end_mask = var_2142_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_2142_cast_fp16")]; tensor _SplitHeadsQ__mh_w_289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_289_equation_0, values = (var_2052_cast_fp16, var_1976_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_291_equation_0, values = (var_2052_cast_fp16, var_1977_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_293_equation_0, values = (var_2052_cast_fp16, var_1978_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_295_equation_0, values = (var_2052_cast_fp16, var_1979_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_297_equation_0, values = (var_2052_cast_fp16, var_1980_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_299_equation_0, values = (var_2052_cast_fp16, var_1981_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_301_equation_0, values = (var_2056_cast_fp16, var_1982_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_303_equation_0, values = (var_2056_cast_fp16, var_1983_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_305_equation_0, values = (var_2056_cast_fp16, var_1984_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_307_equation_0, values = (var_2056_cast_fp16, var_1985_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_309_equation_0, values = (var_2056_cast_fp16, var_1986_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_311_equation_0, values = (var_2056_cast_fp16, var_1987_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_313_equation_0, values = (var_2060_cast_fp16, var_1988_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_315_equation_0, values = (var_2060_cast_fp16, var_1989_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_317_equation_0, values = (var_2060_cast_fp16, var_1990_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_319_equation_0, values = (var_2060_cast_fp16, var_1991_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_319_cast_fp16")]; tensor _SplitHeadsQ__mh_w_321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_321_equation_0, values = (var_2060_cast_fp16, var_1992_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_323_equation_0, values = (var_2060_cast_fp16, var_1993_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_325_equation_0, values = (var_2064_cast_fp16, var_1994_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_327_equation_0, values = (var_2064_cast_fp16, var_1995_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_329_equation_0, values = (var_2064_cast_fp16, var_1996_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_331_equation_0, values = (var_2064_cast_fp16, var_1997_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_333_equation_0, values = (var_2064_cast_fp16, var_1998_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_335_equation_0, values = (var_2064_cast_fp16, var_1999_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_337_equation_0, values = (var_2068_cast_fp16, var_2000_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_339_equation_0, values = (var_2068_cast_fp16, var_2001_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_341_equation_0, values = (var_2068_cast_fp16, var_2002_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_343_equation_0, values = (var_2068_cast_fp16, var_2003_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_345_equation_0, values = (var_2068_cast_fp16, var_2004_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_347_equation_0, values = (var_2068_cast_fp16, var_2005_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_349_equation_0, values = (var_2072_cast_fp16, var_2006_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_351_equation_0, values = (var_2072_cast_fp16, var_2007_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_353_equation_0, values = (var_2072_cast_fp16, var_2008_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_355_equation_0, values = (var_2072_cast_fp16, var_2009_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_357_equation_0, values = (var_2072_cast_fp16, var_2010_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_359_equation_0, values = (var_2072_cast_fp16, var_2011_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_361_equation_0, values = (var_2076_cast_fp16, var_2012_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_363_equation_0, values = (var_2076_cast_fp16, var_2013_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_365_equation_0, values = (var_2076_cast_fp16, var_2014_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_367_equation_0, values = (var_2076_cast_fp16, var_2015_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_369_equation_0, values = (var_2076_cast_fp16, var_2016_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_371_equation_0, values = (var_2076_cast_fp16, var_2017_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_373_equation_0, values = (var_2080_cast_fp16, var_2018_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_375_equation_0, values = (var_2080_cast_fp16, var_2019_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_377_equation_0, values = (var_2080_cast_fp16, var_2020_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_379_equation_0, values = (var_2080_cast_fp16, var_2021_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_381_equation_0, values = (var_2080_cast_fp16, var_2022_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_383_equation_0, values = (var_2080_cast_fp16, var_2023_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_385_equation_0, values = (var_2084_cast_fp16, var_2024_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_387_equation_0, values = (var_2084_cast_fp16, var_2025_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_389_equation_0, values = (var_2084_cast_fp16, var_2026_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_391_equation_0, values = (var_2084_cast_fp16, var_2027_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_393_equation_0, values = (var_2084_cast_fp16, var_2028_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_395_equation_0, values = (var_2084_cast_fp16, var_2029_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_397_equation_0, values = (var_2088_cast_fp16, var_2030_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_399_equation_0, values = (var_2088_cast_fp16, var_2031_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_401_equation_0, values = (var_2088_cast_fp16, var_2032_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_403_equation_0, values = (var_2088_cast_fp16, var_2033_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_405_equation_0, values = (var_2088_cast_fp16, var_2034_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_407_equation_0, values = (var_2088_cast_fp16, var_2035_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_409_equation_0, values = (var_2092_cast_fp16, var_2036_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_411_equation_0, values = (var_2092_cast_fp16, var_2037_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_413_equation_0, values = (var_2092_cast_fp16, var_2038_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_415_equation_0, values = (var_2092_cast_fp16, var_2039_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_417_equation_0, values = (var_2092_cast_fp16, var_2040_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_419_equation_0, values = (var_2092_cast_fp16, var_2041_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_421_equation_0, values = (var_2096_cast_fp16, var_2042_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_423_equation_0, values = (var_2096_cast_fp16, var_2043_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_425_equation_0, values = (var_2096_cast_fp16, var_2044_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_427_equation_0, values = (var_2096_cast_fp16, var_2045_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_429_equation_0, values = (var_2096_cast_fp16, var_2046_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_431_equation_0, values = (var_2096_cast_fp16, var_2047_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_431_cast_fp16")]; tensor var_2289_to_fp16 = const()[name = tensor("op_2289_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_289_cast_fp16, y = var_2289_to_fp16)[name = tensor("aw_chunk_289_cast_fp16")]; tensor var_2291_to_fp16 = const()[name = tensor("op_2291_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_291_cast_fp16, y = var_2291_to_fp16)[name = tensor("aw_chunk_291_cast_fp16")]; tensor var_2293_to_fp16 = const()[name = tensor("op_2293_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_293_cast_fp16, y = var_2293_to_fp16)[name = tensor("aw_chunk_293_cast_fp16")]; tensor var_2295_to_fp16 = const()[name = tensor("op_2295_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_295_cast_fp16, y = var_2295_to_fp16)[name = tensor("aw_chunk_295_cast_fp16")]; tensor var_2297_to_fp16 = const()[name = tensor("op_2297_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_297_cast_fp16, y = var_2297_to_fp16)[name = tensor("aw_chunk_297_cast_fp16")]; tensor var_2299_to_fp16 = const()[name = tensor("op_2299_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_299_cast_fp16, y = var_2299_to_fp16)[name = tensor("aw_chunk_299_cast_fp16")]; tensor var_2301_to_fp16 = const()[name = tensor("op_2301_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_301_cast_fp16, y = var_2301_to_fp16)[name = tensor("aw_chunk_301_cast_fp16")]; tensor var_2303_to_fp16 = const()[name = tensor("op_2303_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_303_cast_fp16, y = var_2303_to_fp16)[name = tensor("aw_chunk_303_cast_fp16")]; tensor var_2305_to_fp16 = const()[name = tensor("op_2305_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_305_cast_fp16, y = var_2305_to_fp16)[name = tensor("aw_chunk_305_cast_fp16")]; tensor var_2307_to_fp16 = const()[name = tensor("op_2307_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_307_cast_fp16, y = var_2307_to_fp16)[name = tensor("aw_chunk_307_cast_fp16")]; tensor var_2309_to_fp16 = const()[name = tensor("op_2309_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_309_cast_fp16, y = var_2309_to_fp16)[name = tensor("aw_chunk_309_cast_fp16")]; tensor var_2311_to_fp16 = const()[name = tensor("op_2311_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_311_cast_fp16, y = var_2311_to_fp16)[name = tensor("aw_chunk_311_cast_fp16")]; tensor var_2313_to_fp16 = const()[name = tensor("op_2313_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_313_cast_fp16, y = var_2313_to_fp16)[name = tensor("aw_chunk_313_cast_fp16")]; tensor var_2315_to_fp16 = const()[name = tensor("op_2315_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_315_cast_fp16, y = var_2315_to_fp16)[name = tensor("aw_chunk_315_cast_fp16")]; tensor var_2317_to_fp16 = const()[name = tensor("op_2317_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_317_cast_fp16, y = var_2317_to_fp16)[name = tensor("aw_chunk_317_cast_fp16")]; tensor var_2319_to_fp16 = const()[name = tensor("op_2319_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_319_cast_fp16, y = var_2319_to_fp16)[name = tensor("aw_chunk_319_cast_fp16")]; tensor var_2321_to_fp16 = const()[name = tensor("op_2321_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_321_cast_fp16, y = var_2321_to_fp16)[name = tensor("aw_chunk_321_cast_fp16")]; tensor var_2323_to_fp16 = const()[name = tensor("op_2323_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_323_cast_fp16, y = var_2323_to_fp16)[name = tensor("aw_chunk_323_cast_fp16")]; tensor var_2325_to_fp16 = const()[name = tensor("op_2325_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_325_cast_fp16, y = var_2325_to_fp16)[name = tensor("aw_chunk_325_cast_fp16")]; tensor var_2327_to_fp16 = const()[name = tensor("op_2327_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_327_cast_fp16, y = var_2327_to_fp16)[name = tensor("aw_chunk_327_cast_fp16")]; tensor var_2329_to_fp16 = const()[name = tensor("op_2329_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_329_cast_fp16, y = var_2329_to_fp16)[name = tensor("aw_chunk_329_cast_fp16")]; tensor var_2331_to_fp16 = const()[name = tensor("op_2331_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_331_cast_fp16, y = var_2331_to_fp16)[name = tensor("aw_chunk_331_cast_fp16")]; tensor var_2333_to_fp16 = const()[name = tensor("op_2333_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_333_cast_fp16, y = var_2333_to_fp16)[name = tensor("aw_chunk_333_cast_fp16")]; tensor var_2335_to_fp16 = const()[name = tensor("op_2335_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_335_cast_fp16, y = var_2335_to_fp16)[name = tensor("aw_chunk_335_cast_fp16")]; tensor var_2337_to_fp16 = const()[name = tensor("op_2337_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_337_cast_fp16, y = var_2337_to_fp16)[name = tensor("aw_chunk_337_cast_fp16")]; tensor var_2339_to_fp16 = const()[name = tensor("op_2339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_339_cast_fp16, y = var_2339_to_fp16)[name = tensor("aw_chunk_339_cast_fp16")]; tensor var_2341_to_fp16 = const()[name = tensor("op_2341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_341_cast_fp16, y = var_2341_to_fp16)[name = tensor("aw_chunk_341_cast_fp16")]; tensor var_2343_to_fp16 = const()[name = tensor("op_2343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_343_cast_fp16, y = var_2343_to_fp16)[name = tensor("aw_chunk_343_cast_fp16")]; tensor var_2345_to_fp16 = const()[name = tensor("op_2345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_345_cast_fp16, y = var_2345_to_fp16)[name = tensor("aw_chunk_345_cast_fp16")]; tensor var_2347_to_fp16 = const()[name = tensor("op_2347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_347_cast_fp16, y = var_2347_to_fp16)[name = tensor("aw_chunk_347_cast_fp16")]; tensor var_2349_to_fp16 = const()[name = tensor("op_2349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_349_cast_fp16, y = var_2349_to_fp16)[name = tensor("aw_chunk_349_cast_fp16")]; tensor var_2351_to_fp16 = const()[name = tensor("op_2351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_351_cast_fp16, y = var_2351_to_fp16)[name = tensor("aw_chunk_351_cast_fp16")]; tensor var_2353_to_fp16 = const()[name = tensor("op_2353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_353_cast_fp16, y = var_2353_to_fp16)[name = tensor("aw_chunk_353_cast_fp16")]; tensor var_2355_to_fp16 = const()[name = tensor("op_2355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_355_cast_fp16, y = var_2355_to_fp16)[name = tensor("aw_chunk_355_cast_fp16")]; tensor var_2357_to_fp16 = const()[name = tensor("op_2357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_357_cast_fp16, y = var_2357_to_fp16)[name = tensor("aw_chunk_357_cast_fp16")]; tensor var_2359_to_fp16 = const()[name = tensor("op_2359_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_359_cast_fp16, y = var_2359_to_fp16)[name = tensor("aw_chunk_359_cast_fp16")]; tensor var_2361_to_fp16 = const()[name = tensor("op_2361_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_361_cast_fp16, y = var_2361_to_fp16)[name = tensor("aw_chunk_361_cast_fp16")]; tensor var_2363_to_fp16 = const()[name = tensor("op_2363_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_363_cast_fp16, y = var_2363_to_fp16)[name = tensor("aw_chunk_363_cast_fp16")]; tensor var_2365_to_fp16 = const()[name = tensor("op_2365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_365_cast_fp16, y = var_2365_to_fp16)[name = tensor("aw_chunk_365_cast_fp16")]; tensor var_2367_to_fp16 = const()[name = tensor("op_2367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_367_cast_fp16, y = var_2367_to_fp16)[name = tensor("aw_chunk_367_cast_fp16")]; tensor var_2369_to_fp16 = const()[name = tensor("op_2369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_369_cast_fp16, y = var_2369_to_fp16)[name = tensor("aw_chunk_369_cast_fp16")]; tensor var_2371_to_fp16 = const()[name = tensor("op_2371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_371_cast_fp16, y = var_2371_to_fp16)[name = tensor("aw_chunk_371_cast_fp16")]; tensor var_2373_to_fp16 = const()[name = tensor("op_2373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_373_cast_fp16, y = var_2373_to_fp16)[name = tensor("aw_chunk_373_cast_fp16")]; tensor var_2375_to_fp16 = const()[name = tensor("op_2375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_375_cast_fp16, y = var_2375_to_fp16)[name = tensor("aw_chunk_375_cast_fp16")]; tensor var_2377_to_fp16 = const()[name = tensor("op_2377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_377_cast_fp16, y = var_2377_to_fp16)[name = tensor("aw_chunk_377_cast_fp16")]; tensor var_2379_to_fp16 = const()[name = tensor("op_2379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_379_cast_fp16, y = var_2379_to_fp16)[name = tensor("aw_chunk_379_cast_fp16")]; tensor var_2381_to_fp16 = const()[name = tensor("op_2381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_381_cast_fp16, y = var_2381_to_fp16)[name = tensor("aw_chunk_381_cast_fp16")]; tensor var_2383_to_fp16 = const()[name = tensor("op_2383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_383_cast_fp16, y = var_2383_to_fp16)[name = tensor("aw_chunk_383_cast_fp16")]; tensor var_2385_to_fp16 = const()[name = tensor("op_2385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_385_cast_fp16, y = var_2385_to_fp16)[name = tensor("aw_chunk_385_cast_fp16")]; tensor var_2387_to_fp16 = const()[name = tensor("op_2387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_387_cast_fp16, y = var_2387_to_fp16)[name = tensor("aw_chunk_387_cast_fp16")]; tensor var_2389_to_fp16 = const()[name = tensor("op_2389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_389_cast_fp16, y = var_2389_to_fp16)[name = tensor("aw_chunk_389_cast_fp16")]; tensor var_2391_to_fp16 = const()[name = tensor("op_2391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_391_cast_fp16, y = var_2391_to_fp16)[name = tensor("aw_chunk_391_cast_fp16")]; tensor var_2393_to_fp16 = const()[name = tensor("op_2393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_393_cast_fp16, y = var_2393_to_fp16)[name = tensor("aw_chunk_393_cast_fp16")]; tensor var_2395_to_fp16 = const()[name = tensor("op_2395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_395_cast_fp16, y = var_2395_to_fp16)[name = tensor("aw_chunk_395_cast_fp16")]; tensor var_2397_to_fp16 = const()[name = tensor("op_2397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_397_cast_fp16, y = var_2397_to_fp16)[name = tensor("aw_chunk_397_cast_fp16")]; tensor var_2399_to_fp16 = const()[name = tensor("op_2399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_399_cast_fp16, y = var_2399_to_fp16)[name = tensor("aw_chunk_399_cast_fp16")]; tensor var_2401_to_fp16 = const()[name = tensor("op_2401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_401_cast_fp16, y = var_2401_to_fp16)[name = tensor("aw_chunk_401_cast_fp16")]; tensor var_2403_to_fp16 = const()[name = tensor("op_2403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_403_cast_fp16, y = var_2403_to_fp16)[name = tensor("aw_chunk_403_cast_fp16")]; tensor var_2405_to_fp16 = const()[name = tensor("op_2405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_405_cast_fp16, y = var_2405_to_fp16)[name = tensor("aw_chunk_405_cast_fp16")]; tensor var_2407_to_fp16 = const()[name = tensor("op_2407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_407_cast_fp16, y = var_2407_to_fp16)[name = tensor("aw_chunk_407_cast_fp16")]; tensor var_2409_to_fp16 = const()[name = tensor("op_2409_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_409_cast_fp16, y = var_2409_to_fp16)[name = tensor("aw_chunk_409_cast_fp16")]; tensor var_2411_to_fp16 = const()[name = tensor("op_2411_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_411_cast_fp16, y = var_2411_to_fp16)[name = tensor("aw_chunk_411_cast_fp16")]; tensor var_2413_to_fp16 = const()[name = tensor("op_2413_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_413_cast_fp16, y = var_2413_to_fp16)[name = tensor("aw_chunk_413_cast_fp16")]; tensor var_2415_to_fp16 = const()[name = tensor("op_2415_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_415_cast_fp16, y = var_2415_to_fp16)[name = tensor("aw_chunk_415_cast_fp16")]; tensor var_2417_to_fp16 = const()[name = tensor("op_2417_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_417_cast_fp16, y = var_2417_to_fp16)[name = tensor("aw_chunk_417_cast_fp16")]; tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_419_cast_fp16, y = var_2419_to_fp16)[name = tensor("aw_chunk_419_cast_fp16")]; tensor var_2421_to_fp16 = const()[name = tensor("op_2421_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_421_cast_fp16, y = var_2421_to_fp16)[name = tensor("aw_chunk_421_cast_fp16")]; tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_423_cast_fp16, y = var_2423_to_fp16)[name = tensor("aw_chunk_423_cast_fp16")]; tensor var_2425_to_fp16 = const()[name = tensor("op_2425_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_425_cast_fp16, y = var_2425_to_fp16)[name = tensor("aw_chunk_425_cast_fp16")]; tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_427_cast_fp16, y = var_2427_to_fp16)[name = tensor("aw_chunk_427_cast_fp16")]; tensor var_2429_to_fp16 = const()[name = tensor("op_2429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_429_cast_fp16, y = var_2429_to_fp16)[name = tensor("aw_chunk_429_cast_fp16")]; tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_431_cast_fp16, y = var_2431_to_fp16)[name = tensor("aw_chunk_431_cast_fp16")]; tensor var_2433_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_289_cast_fp16)[name = tensor("op_2433_cast_fp16")]; tensor var_2434_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_291_cast_fp16)[name = tensor("op_2434_cast_fp16")]; tensor var_2435_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_293_cast_fp16)[name = tensor("op_2435_cast_fp16")]; tensor var_2436_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_295_cast_fp16)[name = tensor("op_2436_cast_fp16")]; tensor var_2437_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_297_cast_fp16)[name = tensor("op_2437_cast_fp16")]; tensor var_2438_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_299_cast_fp16)[name = tensor("op_2438_cast_fp16")]; tensor var_2439_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_301_cast_fp16)[name = tensor("op_2439_cast_fp16")]; tensor var_2440_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_303_cast_fp16)[name = tensor("op_2440_cast_fp16")]; tensor var_2441_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_305_cast_fp16)[name = tensor("op_2441_cast_fp16")]; tensor var_2442_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_307_cast_fp16)[name = tensor("op_2442_cast_fp16")]; tensor var_2443_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_309_cast_fp16)[name = tensor("op_2443_cast_fp16")]; tensor var_2444_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_311_cast_fp16)[name = tensor("op_2444_cast_fp16")]; tensor var_2445_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_313_cast_fp16)[name = tensor("op_2445_cast_fp16")]; tensor var_2446_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_315_cast_fp16)[name = tensor("op_2446_cast_fp16")]; tensor var_2447_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_317_cast_fp16)[name = tensor("op_2447_cast_fp16")]; tensor var_2448_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_319_cast_fp16)[name = tensor("op_2448_cast_fp16")]; tensor var_2449_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_321_cast_fp16)[name = tensor("op_2449_cast_fp16")]; tensor var_2450_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_323_cast_fp16)[name = tensor("op_2450_cast_fp16")]; tensor var_2451_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_325_cast_fp16)[name = tensor("op_2451_cast_fp16")]; tensor var_2452_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_327_cast_fp16)[name = tensor("op_2452_cast_fp16")]; tensor var_2453_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_329_cast_fp16)[name = tensor("op_2453_cast_fp16")]; tensor var_2454_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_331_cast_fp16)[name = tensor("op_2454_cast_fp16")]; tensor var_2455_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_333_cast_fp16)[name = tensor("op_2455_cast_fp16")]; tensor var_2456_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_335_cast_fp16)[name = tensor("op_2456_cast_fp16")]; tensor var_2457_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_337_cast_fp16)[name = tensor("op_2457_cast_fp16")]; tensor var_2458_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_339_cast_fp16)[name = tensor("op_2458_cast_fp16")]; tensor var_2459_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_341_cast_fp16)[name = tensor("op_2459_cast_fp16")]; tensor var_2460_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_343_cast_fp16)[name = tensor("op_2460_cast_fp16")]; tensor var_2461_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_345_cast_fp16)[name = tensor("op_2461_cast_fp16")]; tensor var_2462_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_347_cast_fp16)[name = tensor("op_2462_cast_fp16")]; tensor var_2463_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_349_cast_fp16)[name = tensor("op_2463_cast_fp16")]; tensor var_2464_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_351_cast_fp16)[name = tensor("op_2464_cast_fp16")]; tensor var_2465_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_353_cast_fp16)[name = tensor("op_2465_cast_fp16")]; tensor var_2466_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_355_cast_fp16)[name = tensor("op_2466_cast_fp16")]; tensor var_2467_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_357_cast_fp16)[name = tensor("op_2467_cast_fp16")]; tensor var_2468_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_359_cast_fp16)[name = tensor("op_2468_cast_fp16")]; tensor var_2469_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_361_cast_fp16)[name = tensor("op_2469_cast_fp16")]; tensor var_2470_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_363_cast_fp16)[name = tensor("op_2470_cast_fp16")]; tensor var_2471_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_365_cast_fp16)[name = tensor("op_2471_cast_fp16")]; tensor var_2472_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_367_cast_fp16)[name = tensor("op_2472_cast_fp16")]; tensor var_2473_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_369_cast_fp16)[name = tensor("op_2473_cast_fp16")]; tensor var_2474_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_371_cast_fp16)[name = tensor("op_2474_cast_fp16")]; tensor var_2475_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_373_cast_fp16)[name = tensor("op_2475_cast_fp16")]; tensor var_2476_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_375_cast_fp16)[name = tensor("op_2476_cast_fp16")]; tensor var_2477_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_377_cast_fp16)[name = tensor("op_2477_cast_fp16")]; tensor var_2478_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_379_cast_fp16)[name = tensor("op_2478_cast_fp16")]; tensor var_2479_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_381_cast_fp16)[name = tensor("op_2479_cast_fp16")]; tensor var_2480_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_383_cast_fp16)[name = tensor("op_2480_cast_fp16")]; tensor var_2481_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_385_cast_fp16)[name = tensor("op_2481_cast_fp16")]; tensor var_2482_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_387_cast_fp16)[name = tensor("op_2482_cast_fp16")]; tensor var_2483_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_389_cast_fp16)[name = tensor("op_2483_cast_fp16")]; tensor var_2484_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_391_cast_fp16)[name = tensor("op_2484_cast_fp16")]; tensor var_2485_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_393_cast_fp16)[name = tensor("op_2485_cast_fp16")]; tensor var_2486_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_395_cast_fp16)[name = tensor("op_2486_cast_fp16")]; tensor var_2487_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_397_cast_fp16)[name = tensor("op_2487_cast_fp16")]; tensor var_2488_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_399_cast_fp16)[name = tensor("op_2488_cast_fp16")]; tensor var_2489_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_401_cast_fp16)[name = tensor("op_2489_cast_fp16")]; tensor var_2490_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_403_cast_fp16)[name = tensor("op_2490_cast_fp16")]; tensor var_2491_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_405_cast_fp16)[name = tensor("op_2491_cast_fp16")]; tensor var_2492_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_407_cast_fp16)[name = tensor("op_2492_cast_fp16")]; tensor var_2493_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_409_cast_fp16)[name = tensor("op_2493_cast_fp16")]; tensor var_2494_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_411_cast_fp16)[name = tensor("op_2494_cast_fp16")]; tensor var_2495_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_413_cast_fp16)[name = tensor("op_2495_cast_fp16")]; tensor var_2496_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_415_cast_fp16)[name = tensor("op_2496_cast_fp16")]; tensor var_2497_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_417_cast_fp16)[name = tensor("op_2497_cast_fp16")]; tensor var_2498_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_419_cast_fp16)[name = tensor("op_2498_cast_fp16")]; tensor var_2499_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_421_cast_fp16)[name = tensor("op_2499_cast_fp16")]; tensor var_2500_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_423_cast_fp16)[name = tensor("op_2500_cast_fp16")]; tensor var_2501_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_425_cast_fp16)[name = tensor("op_2501_cast_fp16")]; tensor var_2502_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_427_cast_fp16)[name = tensor("op_2502_cast_fp16")]; tensor var_2503_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_429_cast_fp16)[name = tensor("op_2503_cast_fp16")]; tensor var_2504_cast_fp16 = softmax(axis = var_1877, x = aw_chunk_431_cast_fp16)[name = tensor("op_2504_cast_fp16")]; tensor var_2506_equation_0 = const()[name = tensor("op_2506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2506_cast_fp16 = einsum(equation = var_2506_equation_0, values = (var_2098_cast_fp16, var_2433_cast_fp16))[name = tensor("op_2506_cast_fp16")]; tensor var_2508_equation_0 = const()[name = tensor("op_2508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2508_cast_fp16 = einsum(equation = var_2508_equation_0, values = (var_2098_cast_fp16, var_2434_cast_fp16))[name = tensor("op_2508_cast_fp16")]; tensor var_2510_equation_0 = const()[name = tensor("op_2510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2510_cast_fp16 = einsum(equation = var_2510_equation_0, values = (var_2098_cast_fp16, var_2435_cast_fp16))[name = tensor("op_2510_cast_fp16")]; tensor var_2512_equation_0 = const()[name = tensor("op_2512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2512_cast_fp16 = einsum(equation = var_2512_equation_0, values = (var_2098_cast_fp16, var_2436_cast_fp16))[name = tensor("op_2512_cast_fp16")]; tensor var_2514_equation_0 = const()[name = tensor("op_2514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2514_cast_fp16 = einsum(equation = var_2514_equation_0, values = (var_2098_cast_fp16, var_2437_cast_fp16))[name = tensor("op_2514_cast_fp16")]; tensor var_2516_equation_0 = const()[name = tensor("op_2516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2516_cast_fp16 = einsum(equation = var_2516_equation_0, values = (var_2098_cast_fp16, var_2438_cast_fp16))[name = tensor("op_2516_cast_fp16")]; tensor var_2518_equation_0 = const()[name = tensor("op_2518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2518_cast_fp16 = einsum(equation = var_2518_equation_0, values = (var_2102_cast_fp16, var_2439_cast_fp16))[name = tensor("op_2518_cast_fp16")]; tensor var_2520_equation_0 = const()[name = tensor("op_2520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2520_cast_fp16 = einsum(equation = var_2520_equation_0, values = (var_2102_cast_fp16, var_2440_cast_fp16))[name = tensor("op_2520_cast_fp16")]; tensor var_2522_equation_0 = const()[name = tensor("op_2522_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2522_cast_fp16 = einsum(equation = var_2522_equation_0, values = (var_2102_cast_fp16, var_2441_cast_fp16))[name = tensor("op_2522_cast_fp16")]; tensor var_2524_equation_0 = const()[name = tensor("op_2524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2524_cast_fp16 = einsum(equation = var_2524_equation_0, values = (var_2102_cast_fp16, var_2442_cast_fp16))[name = tensor("op_2524_cast_fp16")]; tensor var_2526_equation_0 = const()[name = tensor("op_2526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2526_cast_fp16 = einsum(equation = var_2526_equation_0, values = (var_2102_cast_fp16, var_2443_cast_fp16))[name = tensor("op_2526_cast_fp16")]; tensor var_2528_equation_0 = const()[name = tensor("op_2528_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2528_cast_fp16 = einsum(equation = var_2528_equation_0, values = (var_2102_cast_fp16, var_2444_cast_fp16))[name = tensor("op_2528_cast_fp16")]; tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2106_cast_fp16, var_2445_cast_fp16))[name = tensor("op_2530_cast_fp16")]; tensor var_2532_equation_0 = const()[name = tensor("op_2532_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2532_cast_fp16 = einsum(equation = var_2532_equation_0, values = (var_2106_cast_fp16, var_2446_cast_fp16))[name = tensor("op_2532_cast_fp16")]; tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2106_cast_fp16, var_2447_cast_fp16))[name = tensor("op_2534_cast_fp16")]; tensor var_2536_equation_0 = const()[name = tensor("op_2536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2536_cast_fp16 = einsum(equation = var_2536_equation_0, values = (var_2106_cast_fp16, var_2448_cast_fp16))[name = tensor("op_2536_cast_fp16")]; tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2106_cast_fp16, var_2449_cast_fp16))[name = tensor("op_2538_cast_fp16")]; tensor var_2540_equation_0 = const()[name = tensor("op_2540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2540_cast_fp16 = einsum(equation = var_2540_equation_0, values = (var_2106_cast_fp16, var_2450_cast_fp16))[name = tensor("op_2540_cast_fp16")]; tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2110_cast_fp16, var_2451_cast_fp16))[name = tensor("op_2542_cast_fp16")]; tensor var_2544_equation_0 = const()[name = tensor("op_2544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2544_cast_fp16 = einsum(equation = var_2544_equation_0, values = (var_2110_cast_fp16, var_2452_cast_fp16))[name = tensor("op_2544_cast_fp16")]; tensor var_2546_equation_0 = const()[name = tensor("op_2546_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2546_cast_fp16 = einsum(equation = var_2546_equation_0, values = (var_2110_cast_fp16, var_2453_cast_fp16))[name = tensor("op_2546_cast_fp16")]; tensor var_2548_equation_0 = const()[name = tensor("op_2548_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2548_cast_fp16 = einsum(equation = var_2548_equation_0, values = (var_2110_cast_fp16, var_2454_cast_fp16))[name = tensor("op_2548_cast_fp16")]; tensor var_2550_equation_0 = const()[name = tensor("op_2550_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2550_cast_fp16 = einsum(equation = var_2550_equation_0, values = (var_2110_cast_fp16, var_2455_cast_fp16))[name = tensor("op_2550_cast_fp16")]; tensor var_2552_equation_0 = const()[name = tensor("op_2552_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2552_cast_fp16 = einsum(equation = var_2552_equation_0, values = (var_2110_cast_fp16, var_2456_cast_fp16))[name = tensor("op_2552_cast_fp16")]; tensor var_2554_equation_0 = const()[name = tensor("op_2554_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2554_cast_fp16 = einsum(equation = var_2554_equation_0, values = (var_2114_cast_fp16, var_2457_cast_fp16))[name = tensor("op_2554_cast_fp16")]; tensor var_2556_equation_0 = const()[name = tensor("op_2556_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2556_cast_fp16 = einsum(equation = var_2556_equation_0, values = (var_2114_cast_fp16, var_2458_cast_fp16))[name = tensor("op_2556_cast_fp16")]; tensor var_2558_equation_0 = const()[name = tensor("op_2558_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2558_cast_fp16 = einsum(equation = var_2558_equation_0, values = (var_2114_cast_fp16, var_2459_cast_fp16))[name = tensor("op_2558_cast_fp16")]; tensor var_2560_equation_0 = const()[name = tensor("op_2560_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2560_cast_fp16 = einsum(equation = var_2560_equation_0, values = (var_2114_cast_fp16, var_2460_cast_fp16))[name = tensor("op_2560_cast_fp16")]; tensor var_2562_equation_0 = const()[name = tensor("op_2562_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2562_cast_fp16 = einsum(equation = var_2562_equation_0, values = (var_2114_cast_fp16, var_2461_cast_fp16))[name = tensor("op_2562_cast_fp16")]; tensor var_2564_equation_0 = const()[name = tensor("op_2564_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2564_cast_fp16 = einsum(equation = var_2564_equation_0, values = (var_2114_cast_fp16, var_2462_cast_fp16))[name = tensor("op_2564_cast_fp16")]; tensor var_2566_equation_0 = const()[name = tensor("op_2566_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2566_cast_fp16 = einsum(equation = var_2566_equation_0, values = (var_2118_cast_fp16, var_2463_cast_fp16))[name = tensor("op_2566_cast_fp16")]; tensor var_2568_equation_0 = const()[name = tensor("op_2568_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2568_cast_fp16 = einsum(equation = var_2568_equation_0, values = (var_2118_cast_fp16, var_2464_cast_fp16))[name = tensor("op_2568_cast_fp16")]; tensor var_2570_equation_0 = const()[name = tensor("op_2570_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2570_cast_fp16 = einsum(equation = var_2570_equation_0, values = (var_2118_cast_fp16, var_2465_cast_fp16))[name = tensor("op_2570_cast_fp16")]; tensor var_2572_equation_0 = const()[name = tensor("op_2572_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2572_cast_fp16 = einsum(equation = var_2572_equation_0, values = (var_2118_cast_fp16, var_2466_cast_fp16))[name = tensor("op_2572_cast_fp16")]; tensor var_2574_equation_0 = const()[name = tensor("op_2574_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2574_cast_fp16 = einsum(equation = var_2574_equation_0, values = (var_2118_cast_fp16, var_2467_cast_fp16))[name = tensor("op_2574_cast_fp16")]; tensor var_2576_equation_0 = const()[name = tensor("op_2576_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2576_cast_fp16 = einsum(equation = var_2576_equation_0, values = (var_2118_cast_fp16, var_2468_cast_fp16))[name = tensor("op_2576_cast_fp16")]; tensor var_2578_equation_0 = const()[name = tensor("op_2578_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2578_cast_fp16 = einsum(equation = var_2578_equation_0, values = (var_2122_cast_fp16, var_2469_cast_fp16))[name = tensor("op_2578_cast_fp16")]; tensor var_2580_equation_0 = const()[name = tensor("op_2580_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2580_cast_fp16 = einsum(equation = var_2580_equation_0, values = (var_2122_cast_fp16, var_2470_cast_fp16))[name = tensor("op_2580_cast_fp16")]; tensor var_2582_equation_0 = const()[name = tensor("op_2582_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2582_cast_fp16 = einsum(equation = var_2582_equation_0, values = (var_2122_cast_fp16, var_2471_cast_fp16))[name = tensor("op_2582_cast_fp16")]; tensor var_2584_equation_0 = const()[name = tensor("op_2584_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2584_cast_fp16 = einsum(equation = var_2584_equation_0, values = (var_2122_cast_fp16, var_2472_cast_fp16))[name = tensor("op_2584_cast_fp16")]; tensor var_2586_equation_0 = const()[name = tensor("op_2586_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2586_cast_fp16 = einsum(equation = var_2586_equation_0, values = (var_2122_cast_fp16, var_2473_cast_fp16))[name = tensor("op_2586_cast_fp16")]; tensor var_2588_equation_0 = const()[name = tensor("op_2588_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2588_cast_fp16 = einsum(equation = var_2588_equation_0, values = (var_2122_cast_fp16, var_2474_cast_fp16))[name = tensor("op_2588_cast_fp16")]; tensor var_2590_equation_0 = const()[name = tensor("op_2590_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2590_cast_fp16 = einsum(equation = var_2590_equation_0, values = (var_2126_cast_fp16, var_2475_cast_fp16))[name = tensor("op_2590_cast_fp16")]; tensor var_2592_equation_0 = const()[name = tensor("op_2592_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2592_cast_fp16 = einsum(equation = var_2592_equation_0, values = (var_2126_cast_fp16, var_2476_cast_fp16))[name = tensor("op_2592_cast_fp16")]; tensor var_2594_equation_0 = const()[name = tensor("op_2594_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2594_cast_fp16 = einsum(equation = var_2594_equation_0, values = (var_2126_cast_fp16, var_2477_cast_fp16))[name = tensor("op_2594_cast_fp16")]; tensor var_2596_equation_0 = const()[name = tensor("op_2596_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2596_cast_fp16 = einsum(equation = var_2596_equation_0, values = (var_2126_cast_fp16, var_2478_cast_fp16))[name = tensor("op_2596_cast_fp16")]; tensor var_2598_equation_0 = const()[name = tensor("op_2598_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2598_cast_fp16 = einsum(equation = var_2598_equation_0, values = (var_2126_cast_fp16, var_2479_cast_fp16))[name = tensor("op_2598_cast_fp16")]; tensor var_2600_equation_0 = const()[name = tensor("op_2600_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2600_cast_fp16 = einsum(equation = var_2600_equation_0, values = (var_2126_cast_fp16, var_2480_cast_fp16))[name = tensor("op_2600_cast_fp16")]; tensor var_2602_equation_0 = const()[name = tensor("op_2602_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2602_cast_fp16 = einsum(equation = var_2602_equation_0, values = (var_2130_cast_fp16, var_2481_cast_fp16))[name = tensor("op_2602_cast_fp16")]; tensor var_2604_equation_0 = const()[name = tensor("op_2604_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2604_cast_fp16 = einsum(equation = var_2604_equation_0, values = (var_2130_cast_fp16, var_2482_cast_fp16))[name = tensor("op_2604_cast_fp16")]; tensor var_2606_equation_0 = const()[name = tensor("op_2606_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2606_cast_fp16 = einsum(equation = var_2606_equation_0, values = (var_2130_cast_fp16, var_2483_cast_fp16))[name = tensor("op_2606_cast_fp16")]; tensor var_2608_equation_0 = const()[name = tensor("op_2608_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2608_cast_fp16 = einsum(equation = var_2608_equation_0, values = (var_2130_cast_fp16, var_2484_cast_fp16))[name = tensor("op_2608_cast_fp16")]; tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2130_cast_fp16, var_2485_cast_fp16))[name = tensor("op_2610_cast_fp16")]; tensor var_2612_equation_0 = const()[name = tensor("op_2612_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2612_cast_fp16 = einsum(equation = var_2612_equation_0, values = (var_2130_cast_fp16, var_2486_cast_fp16))[name = tensor("op_2612_cast_fp16")]; tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2134_cast_fp16, var_2487_cast_fp16))[name = tensor("op_2614_cast_fp16")]; tensor var_2616_equation_0 = const()[name = tensor("op_2616_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2616_cast_fp16 = einsum(equation = var_2616_equation_0, values = (var_2134_cast_fp16, var_2488_cast_fp16))[name = tensor("op_2616_cast_fp16")]; tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2134_cast_fp16, var_2489_cast_fp16))[name = tensor("op_2618_cast_fp16")]; tensor var_2620_equation_0 = const()[name = tensor("op_2620_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2620_cast_fp16 = einsum(equation = var_2620_equation_0, values = (var_2134_cast_fp16, var_2490_cast_fp16))[name = tensor("op_2620_cast_fp16")]; tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2134_cast_fp16, var_2491_cast_fp16))[name = tensor("op_2622_cast_fp16")]; tensor var_2624_equation_0 = const()[name = tensor("op_2624_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2624_cast_fp16 = einsum(equation = var_2624_equation_0, values = (var_2134_cast_fp16, var_2492_cast_fp16))[name = tensor("op_2624_cast_fp16")]; tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2138_cast_fp16, var_2493_cast_fp16))[name = tensor("op_2626_cast_fp16")]; tensor var_2628_equation_0 = const()[name = tensor("op_2628_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2628_cast_fp16 = einsum(equation = var_2628_equation_0, values = (var_2138_cast_fp16, var_2494_cast_fp16))[name = tensor("op_2628_cast_fp16")]; tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2138_cast_fp16, var_2495_cast_fp16))[name = tensor("op_2630_cast_fp16")]; tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2632_cast_fp16 = einsum(equation = var_2632_equation_0, values = (var_2138_cast_fp16, var_2496_cast_fp16))[name = tensor("op_2632_cast_fp16")]; tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2138_cast_fp16, var_2497_cast_fp16))[name = tensor("op_2634_cast_fp16")]; tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2636_cast_fp16 = einsum(equation = var_2636_equation_0, values = (var_2138_cast_fp16, var_2498_cast_fp16))[name = tensor("op_2636_cast_fp16")]; tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2142_cast_fp16, var_2499_cast_fp16))[name = tensor("op_2638_cast_fp16")]; tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2640_cast_fp16 = einsum(equation = var_2640_equation_0, values = (var_2142_cast_fp16, var_2500_cast_fp16))[name = tensor("op_2640_cast_fp16")]; tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2142_cast_fp16, var_2501_cast_fp16))[name = tensor("op_2642_cast_fp16")]; tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2644_cast_fp16 = einsum(equation = var_2644_equation_0, values = (var_2142_cast_fp16, var_2502_cast_fp16))[name = tensor("op_2644_cast_fp16")]; tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2142_cast_fp16, var_2503_cast_fp16))[name = tensor("op_2646_cast_fp16")]; tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2648_cast_fp16 = einsum(equation = var_2648_equation_0, values = (var_2142_cast_fp16, var_2504_cast_fp16))[name = tensor("op_2648_cast_fp16")]; tensor var_2650_interleave_0 = const()[name = tensor("op_2650_interleave_0"), val = tensor(false)]; tensor var_2650_cast_fp16 = concat(axis = var_1861, interleave = var_2650_interleave_0, values = (var_2506_cast_fp16, var_2508_cast_fp16, var_2510_cast_fp16, var_2512_cast_fp16, var_2514_cast_fp16, var_2516_cast_fp16))[name = tensor("op_2650_cast_fp16")]; tensor var_2652_interleave_0 = const()[name = tensor("op_2652_interleave_0"), val = tensor(false)]; tensor var_2652_cast_fp16 = concat(axis = var_1861, interleave = var_2652_interleave_0, values = (var_2518_cast_fp16, var_2520_cast_fp16, var_2522_cast_fp16, var_2524_cast_fp16, var_2526_cast_fp16, var_2528_cast_fp16))[name = tensor("op_2652_cast_fp16")]; tensor var_2654_interleave_0 = const()[name = tensor("op_2654_interleave_0"), val = tensor(false)]; tensor var_2654_cast_fp16 = concat(axis = var_1861, interleave = var_2654_interleave_0, values = (var_2530_cast_fp16, var_2532_cast_fp16, var_2534_cast_fp16, var_2536_cast_fp16, var_2538_cast_fp16, var_2540_cast_fp16))[name = tensor("op_2654_cast_fp16")]; tensor var_2656_interleave_0 = const()[name = tensor("op_2656_interleave_0"), val = tensor(false)]; tensor var_2656_cast_fp16 = concat(axis = var_1861, interleave = var_2656_interleave_0, values = (var_2542_cast_fp16, var_2544_cast_fp16, var_2546_cast_fp16, var_2548_cast_fp16, var_2550_cast_fp16, var_2552_cast_fp16))[name = tensor("op_2656_cast_fp16")]; tensor var_2658_interleave_0 = const()[name = tensor("op_2658_interleave_0"), val = tensor(false)]; tensor var_2658_cast_fp16 = concat(axis = var_1861, interleave = var_2658_interleave_0, values = (var_2554_cast_fp16, var_2556_cast_fp16, var_2558_cast_fp16, var_2560_cast_fp16, var_2562_cast_fp16, var_2564_cast_fp16))[name = tensor("op_2658_cast_fp16")]; tensor var_2660_interleave_0 = const()[name = tensor("op_2660_interleave_0"), val = tensor(false)]; tensor var_2660_cast_fp16 = concat(axis = var_1861, interleave = var_2660_interleave_0, values = (var_2566_cast_fp16, var_2568_cast_fp16, var_2570_cast_fp16, var_2572_cast_fp16, var_2574_cast_fp16, var_2576_cast_fp16))[name = tensor("op_2660_cast_fp16")]; tensor var_2662_interleave_0 = const()[name = tensor("op_2662_interleave_0"), val = tensor(false)]; tensor var_2662_cast_fp16 = concat(axis = var_1861, interleave = var_2662_interleave_0, values = (var_2578_cast_fp16, var_2580_cast_fp16, var_2582_cast_fp16, var_2584_cast_fp16, var_2586_cast_fp16, var_2588_cast_fp16))[name = tensor("op_2662_cast_fp16")]; tensor var_2664_interleave_0 = const()[name = tensor("op_2664_interleave_0"), val = tensor(false)]; tensor var_2664_cast_fp16 = concat(axis = var_1861, interleave = var_2664_interleave_0, values = (var_2590_cast_fp16, var_2592_cast_fp16, var_2594_cast_fp16, var_2596_cast_fp16, var_2598_cast_fp16, var_2600_cast_fp16))[name = tensor("op_2664_cast_fp16")]; tensor var_2666_interleave_0 = const()[name = tensor("op_2666_interleave_0"), val = tensor(false)]; tensor var_2666_cast_fp16 = concat(axis = var_1861, interleave = var_2666_interleave_0, values = (var_2602_cast_fp16, var_2604_cast_fp16, var_2606_cast_fp16, var_2608_cast_fp16, var_2610_cast_fp16, var_2612_cast_fp16))[name = tensor("op_2666_cast_fp16")]; tensor var_2668_interleave_0 = const()[name = tensor("op_2668_interleave_0"), val = tensor(false)]; tensor var_2668_cast_fp16 = concat(axis = var_1861, interleave = var_2668_interleave_0, values = (var_2614_cast_fp16, var_2616_cast_fp16, var_2618_cast_fp16, var_2620_cast_fp16, var_2622_cast_fp16, var_2624_cast_fp16))[name = tensor("op_2668_cast_fp16")]; tensor var_2670_interleave_0 = const()[name = tensor("op_2670_interleave_0"), val = tensor(false)]; tensor var_2670_cast_fp16 = concat(axis = var_1861, interleave = var_2670_interleave_0, values = (var_2626_cast_fp16, var_2628_cast_fp16, var_2630_cast_fp16, var_2632_cast_fp16, var_2634_cast_fp16, var_2636_cast_fp16))[name = tensor("op_2670_cast_fp16")]; tensor var_2672_interleave_0 = const()[name = tensor("op_2672_interleave_0"), val = tensor(false)]; tensor var_2672_cast_fp16 = concat(axis = var_1861, interleave = var_2672_interleave_0, values = (var_2638_cast_fp16, var_2640_cast_fp16, var_2642_cast_fp16, var_2644_cast_fp16, var_2646_cast_fp16, var_2648_cast_fp16))[name = tensor("op_2672_cast_fp16")]; tensor input_17_interleave_0 = const()[name = tensor("input_17_interleave_0"), val = tensor(false)]; tensor input_17_cast_fp16 = concat(axis = var_1877, interleave = input_17_interleave_0, values = (var_2650_cast_fp16, var_2652_cast_fp16, var_2654_cast_fp16, var_2656_cast_fp16, var_2658_cast_fp16, var_2660_cast_fp16, var_2662_cast_fp16, var_2664_cast_fp16, var_2666_cast_fp16, var_2668_cast_fp16, var_2670_cast_fp16, var_2672_cast_fp16))[name = tensor("input_17_cast_fp16")]; tensor obj_11_pad_type_0 = const()[name = tensor("obj_11_pad_type_0"), val = tensor("valid")]; tensor obj_11_strides_0 = const()[name = tensor("obj_11_strides_0"), val = tensor([1, 1])]; tensor obj_11_pad_0 = const()[name = tensor("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_11_dilations_0 = const()[name = tensor("obj_11_dilations_0"), val = tensor([1, 1])]; tensor obj_11_groups_0 = const()[name = tensor("obj_11_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38114112)))]; tensor layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39293824)))]; tensor obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("obj_11_cast_fp16")]; tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; tensor var_2691_to_fp16 = const()[name = tensor("op_2691_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_2691_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39295424)))]; tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39297024)))]; tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_21_pad_type_0 = const()[name = tensor("input_21_pad_type_0"), val = tensor("valid")]; tensor input_21_strides_0 = const()[name = tensor("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = tensor("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = tensor("input_21_dilations_0"), val = tensor([1, 1])]; tensor input_21_groups_0 = const()[name = tensor("input_21_groups_0"), val = tensor(1)]; tensor layers_2_fc1_weight_to_fp16 = const()[name = tensor("layers_2_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39298624)))]; tensor layers_2_fc1_bias_to_fp16 = const()[name = tensor("layers_2_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44017280)))]; tensor input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor hidden_states_9_pad_type_0 = const()[name = tensor("hidden_states_9_pad_type_0"), val = tensor("valid")]; tensor hidden_states_9_strides_0 = const()[name = tensor("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = tensor("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = tensor("hidden_states_9_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_9_groups_0 = const()[name = tensor("hidden_states_9_groups_0"), val = tensor(1)]; tensor layers_2_fc2_weight_to_fp16 = const()[name = tensor("layers_2_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44023488)))]; tensor layers_2_fc2_bias_to_fp16 = const()[name = tensor("layers_2_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48742144)))]; tensor hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; tensor var_2723 = const()[name = tensor("op_2723"), val = tensor(3)]; tensor var_2739 = const()[name = tensor("op_2739"), val = tensor(1)]; tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; tensor var_2756_to_fp16 = const()[name = tensor("op_2756_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_2756_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48743744)))]; tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48745344)))]; tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; tensor query_7_pad_type_0 = const()[name = tensor("query_7_pad_type_0"), val = tensor("valid")]; tensor query_7_strides_0 = const()[name = tensor("query_7_strides_0"), val = tensor([1, 1])]; tensor query_7_pad_0 = const()[name = tensor("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_7_dilations_0 = const()[name = tensor("query_7_dilations_0"), val = tensor([1, 1])]; tensor query_7_groups_0 = const()[name = tensor("query_7_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48746944)))]; tensor layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49926656)))]; tensor query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_7_dilations_0, groups = query_7_groups_0, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = query_7_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("query_7_cast_fp16")]; tensor key_7_pad_type_0 = const()[name = tensor("key_7_pad_type_0"), val = tensor("valid")]; tensor key_7_strides_0 = const()[name = tensor("key_7_strides_0"), val = tensor([1, 1])]; tensor key_7_pad_0 = const()[name = tensor("key_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_7_dilations_0 = const()[name = tensor("key_7_dilations_0"), val = tensor([1, 1])]; tensor key_7_groups_0 = const()[name = tensor("key_7_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49928256)))]; tensor key_7_cast_fp16 = conv(dilations = key_7_dilations_0, groups = key_7_groups_0, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = key_7_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("key_7_cast_fp16")]; tensor value_7_pad_type_0 = const()[name = tensor("value_7_pad_type_0"), val = tensor("valid")]; tensor value_7_strides_0 = const()[name = tensor("value_7_strides_0"), val = tensor([1, 1])]; tensor value_7_pad_0 = const()[name = tensor("value_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_7_dilations_0 = const()[name = tensor("value_7_dilations_0"), val = tensor([1, 1])]; tensor value_7_groups_0 = const()[name = tensor("value_7_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51107968)))]; tensor layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52287680)))]; tensor value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = value_7_dilations_0, groups = value_7_groups_0, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = value_7_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("value_7_cast_fp16")]; tensor var_2791_begin_0 = const()[name = tensor("op_2791_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2791_end_0 = const()[name = tensor("op_2791_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2791_end_mask_0 = const()[name = tensor("op_2791_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2791_cast_fp16 = slice_by_index(begin = var_2791_begin_0, end = var_2791_end_0, end_mask = var_2791_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2791_cast_fp16")]; tensor var_2795_begin_0 = const()[name = tensor("op_2795_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2795_end_0 = const()[name = tensor("op_2795_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2795_end_mask_0 = const()[name = tensor("op_2795_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2795_cast_fp16 = slice_by_index(begin = var_2795_begin_0, end = var_2795_end_0, end_mask = var_2795_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2795_cast_fp16")]; tensor var_2799_begin_0 = const()[name = tensor("op_2799_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2799_end_0 = const()[name = tensor("op_2799_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2799_end_mask_0 = const()[name = tensor("op_2799_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2799_cast_fp16 = slice_by_index(begin = var_2799_begin_0, end = var_2799_end_0, end_mask = var_2799_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2799_cast_fp16")]; tensor var_2803_begin_0 = const()[name = tensor("op_2803_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2803_end_0 = const()[name = tensor("op_2803_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2803_end_mask_0 = const()[name = tensor("op_2803_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2803_cast_fp16 = slice_by_index(begin = var_2803_begin_0, end = var_2803_end_0, end_mask = var_2803_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2803_cast_fp16")]; tensor var_2807_begin_0 = const()[name = tensor("op_2807_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2807_end_0 = const()[name = tensor("op_2807_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2807_end_mask_0 = const()[name = tensor("op_2807_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2807_cast_fp16 = slice_by_index(begin = var_2807_begin_0, end = var_2807_end_0, end_mask = var_2807_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2807_cast_fp16")]; tensor var_2811_begin_0 = const()[name = tensor("op_2811_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2811_end_0 = const()[name = tensor("op_2811_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2811_end_mask_0 = const()[name = tensor("op_2811_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2811_cast_fp16 = slice_by_index(begin = var_2811_begin_0, end = var_2811_end_0, end_mask = var_2811_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2811_cast_fp16")]; tensor var_2815_begin_0 = const()[name = tensor("op_2815_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2815_end_0 = const()[name = tensor("op_2815_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2815_end_mask_0 = const()[name = tensor("op_2815_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2815_cast_fp16 = slice_by_index(begin = var_2815_begin_0, end = var_2815_end_0, end_mask = var_2815_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2815_cast_fp16")]; tensor var_2819_begin_0 = const()[name = tensor("op_2819_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2819_end_0 = const()[name = tensor("op_2819_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_2819_end_mask_0 = const()[name = tensor("op_2819_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2819_cast_fp16 = slice_by_index(begin = var_2819_begin_0, end = var_2819_end_0, end_mask = var_2819_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2819_cast_fp16")]; tensor var_2823_begin_0 = const()[name = tensor("op_2823_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_2823_end_0 = const()[name = tensor("op_2823_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_2823_end_mask_0 = const()[name = tensor("op_2823_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2823_cast_fp16 = slice_by_index(begin = var_2823_begin_0, end = var_2823_end_0, end_mask = var_2823_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2823_cast_fp16")]; tensor var_2827_begin_0 = const()[name = tensor("op_2827_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_2827_end_0 = const()[name = tensor("op_2827_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_2827_end_mask_0 = const()[name = tensor("op_2827_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2827_cast_fp16 = slice_by_index(begin = var_2827_begin_0, end = var_2827_end_0, end_mask = var_2827_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2827_cast_fp16")]; tensor var_2831_begin_0 = const()[name = tensor("op_2831_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_2831_end_0 = const()[name = tensor("op_2831_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_2831_end_mask_0 = const()[name = tensor("op_2831_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2831_cast_fp16 = slice_by_index(begin = var_2831_begin_0, end = var_2831_end_0, end_mask = var_2831_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2831_cast_fp16")]; tensor var_2835_begin_0 = const()[name = tensor("op_2835_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_2835_end_0 = const()[name = tensor("op_2835_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_2835_end_mask_0 = const()[name = tensor("op_2835_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2835_cast_fp16 = slice_by_index(begin = var_2835_begin_0, end = var_2835_end_0, end_mask = var_2835_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_2835_cast_fp16")]; tensor var_2838_begin_0 = const()[name = tensor("op_2838_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2838_end_0 = const()[name = tensor("op_2838_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2838_end_mask_0 = const()[name = tensor("op_2838_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2838_cast_fp16 = slice_by_index(begin = var_2838_begin_0, end = var_2838_end_0, end_mask = var_2838_end_mask_0, x = var_2791_cast_fp16)[name = tensor("op_2838_cast_fp16")]; tensor var_2839_begin_0 = const()[name = tensor("op_2839_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2839_end_0 = const()[name = tensor("op_2839_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2839_end_mask_0 = const()[name = tensor("op_2839_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2839_cast_fp16 = slice_by_index(begin = var_2839_begin_0, end = var_2839_end_0, end_mask = var_2839_end_mask_0, x = var_2791_cast_fp16)[name = tensor("op_2839_cast_fp16")]; tensor var_2840_begin_0 = const()[name = tensor("op_2840_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2840_end_0 = const()[name = tensor("op_2840_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2840_end_mask_0 = const()[name = tensor("op_2840_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2840_cast_fp16 = slice_by_index(begin = var_2840_begin_0, end = var_2840_end_0, end_mask = var_2840_end_mask_0, x = var_2791_cast_fp16)[name = tensor("op_2840_cast_fp16")]; tensor var_2841_begin_0 = const()[name = tensor("op_2841_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2841_end_0 = const()[name = tensor("op_2841_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2841_end_mask_0 = const()[name = tensor("op_2841_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2841_cast_fp16 = slice_by_index(begin = var_2841_begin_0, end = var_2841_end_0, end_mask = var_2841_end_mask_0, x = var_2791_cast_fp16)[name = tensor("op_2841_cast_fp16")]; tensor var_2842_begin_0 = const()[name = tensor("op_2842_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2842_end_0 = const()[name = tensor("op_2842_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2842_end_mask_0 = const()[name = tensor("op_2842_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2842_cast_fp16 = slice_by_index(begin = var_2842_begin_0, end = var_2842_end_0, end_mask = var_2842_end_mask_0, x = var_2791_cast_fp16)[name = tensor("op_2842_cast_fp16")]; tensor var_2843_begin_0 = const()[name = tensor("op_2843_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2843_end_0 = const()[name = tensor("op_2843_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2843_end_mask_0 = const()[name = tensor("op_2843_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2843_cast_fp16 = slice_by_index(begin = var_2843_begin_0, end = var_2843_end_0, end_mask = var_2843_end_mask_0, x = var_2791_cast_fp16)[name = tensor("op_2843_cast_fp16")]; tensor var_2844_begin_0 = const()[name = tensor("op_2844_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2844_end_0 = const()[name = tensor("op_2844_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2844_end_mask_0 = const()[name = tensor("op_2844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2844_cast_fp16 = slice_by_index(begin = var_2844_begin_0, end = var_2844_end_0, end_mask = var_2844_end_mask_0, x = var_2795_cast_fp16)[name = tensor("op_2844_cast_fp16")]; tensor var_2845_begin_0 = const()[name = tensor("op_2845_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2845_end_0 = const()[name = tensor("op_2845_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2845_end_mask_0 = const()[name = tensor("op_2845_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2845_cast_fp16 = slice_by_index(begin = var_2845_begin_0, end = var_2845_end_0, end_mask = var_2845_end_mask_0, x = var_2795_cast_fp16)[name = tensor("op_2845_cast_fp16")]; tensor var_2846_begin_0 = const()[name = tensor("op_2846_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2846_end_0 = const()[name = tensor("op_2846_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2846_end_mask_0 = const()[name = tensor("op_2846_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2846_cast_fp16 = slice_by_index(begin = var_2846_begin_0, end = var_2846_end_0, end_mask = var_2846_end_mask_0, x = var_2795_cast_fp16)[name = tensor("op_2846_cast_fp16")]; tensor var_2847_begin_0 = const()[name = tensor("op_2847_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2847_end_0 = const()[name = tensor("op_2847_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2847_end_mask_0 = const()[name = tensor("op_2847_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2847_cast_fp16 = slice_by_index(begin = var_2847_begin_0, end = var_2847_end_0, end_mask = var_2847_end_mask_0, x = var_2795_cast_fp16)[name = tensor("op_2847_cast_fp16")]; tensor var_2848_begin_0 = const()[name = tensor("op_2848_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2848_end_0 = const()[name = tensor("op_2848_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2848_end_mask_0 = const()[name = tensor("op_2848_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2848_cast_fp16 = slice_by_index(begin = var_2848_begin_0, end = var_2848_end_0, end_mask = var_2848_end_mask_0, x = var_2795_cast_fp16)[name = tensor("op_2848_cast_fp16")]; tensor var_2849_begin_0 = const()[name = tensor("op_2849_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2849_end_0 = const()[name = tensor("op_2849_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2849_end_mask_0 = const()[name = tensor("op_2849_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2849_cast_fp16 = slice_by_index(begin = var_2849_begin_0, end = var_2849_end_0, end_mask = var_2849_end_mask_0, x = var_2795_cast_fp16)[name = tensor("op_2849_cast_fp16")]; tensor var_2850_begin_0 = const()[name = tensor("op_2850_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2850_end_0 = const()[name = tensor("op_2850_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2850_end_mask_0 = const()[name = tensor("op_2850_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2850_cast_fp16 = slice_by_index(begin = var_2850_begin_0, end = var_2850_end_0, end_mask = var_2850_end_mask_0, x = var_2799_cast_fp16)[name = tensor("op_2850_cast_fp16")]; tensor var_2851_begin_0 = const()[name = tensor("op_2851_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2851_end_0 = const()[name = tensor("op_2851_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2851_end_mask_0 = const()[name = tensor("op_2851_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2851_cast_fp16 = slice_by_index(begin = var_2851_begin_0, end = var_2851_end_0, end_mask = var_2851_end_mask_0, x = var_2799_cast_fp16)[name = tensor("op_2851_cast_fp16")]; tensor var_2852_begin_0 = const()[name = tensor("op_2852_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2852_end_0 = const()[name = tensor("op_2852_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2852_end_mask_0 = const()[name = tensor("op_2852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2852_cast_fp16 = slice_by_index(begin = var_2852_begin_0, end = var_2852_end_0, end_mask = var_2852_end_mask_0, x = var_2799_cast_fp16)[name = tensor("op_2852_cast_fp16")]; tensor var_2853_begin_0 = const()[name = tensor("op_2853_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2853_end_0 = const()[name = tensor("op_2853_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2853_end_mask_0 = const()[name = tensor("op_2853_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2853_cast_fp16 = slice_by_index(begin = var_2853_begin_0, end = var_2853_end_0, end_mask = var_2853_end_mask_0, x = var_2799_cast_fp16)[name = tensor("op_2853_cast_fp16")]; tensor var_2854_begin_0 = const()[name = tensor("op_2854_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2854_end_0 = const()[name = tensor("op_2854_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2854_end_mask_0 = const()[name = tensor("op_2854_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2854_cast_fp16 = slice_by_index(begin = var_2854_begin_0, end = var_2854_end_0, end_mask = var_2854_end_mask_0, x = var_2799_cast_fp16)[name = tensor("op_2854_cast_fp16")]; tensor var_2855_begin_0 = const()[name = tensor("op_2855_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2855_end_0 = const()[name = tensor("op_2855_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2855_end_mask_0 = const()[name = tensor("op_2855_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2855_cast_fp16 = slice_by_index(begin = var_2855_begin_0, end = var_2855_end_0, end_mask = var_2855_end_mask_0, x = var_2799_cast_fp16)[name = tensor("op_2855_cast_fp16")]; tensor var_2856_begin_0 = const()[name = tensor("op_2856_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2856_end_0 = const()[name = tensor("op_2856_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2856_end_mask_0 = const()[name = tensor("op_2856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2856_cast_fp16 = slice_by_index(begin = var_2856_begin_0, end = var_2856_end_0, end_mask = var_2856_end_mask_0, x = var_2803_cast_fp16)[name = tensor("op_2856_cast_fp16")]; tensor var_2857_begin_0 = const()[name = tensor("op_2857_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2857_end_0 = const()[name = tensor("op_2857_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2857_end_mask_0 = const()[name = tensor("op_2857_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2857_cast_fp16 = slice_by_index(begin = var_2857_begin_0, end = var_2857_end_0, end_mask = var_2857_end_mask_0, x = var_2803_cast_fp16)[name = tensor("op_2857_cast_fp16")]; tensor var_2858_begin_0 = const()[name = tensor("op_2858_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2858_end_0 = const()[name = tensor("op_2858_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2858_end_mask_0 = const()[name = tensor("op_2858_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2858_cast_fp16 = slice_by_index(begin = var_2858_begin_0, end = var_2858_end_0, end_mask = var_2858_end_mask_0, x = var_2803_cast_fp16)[name = tensor("op_2858_cast_fp16")]; tensor var_2859_begin_0 = const()[name = tensor("op_2859_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2859_end_0 = const()[name = tensor("op_2859_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2859_end_mask_0 = const()[name = tensor("op_2859_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2859_cast_fp16 = slice_by_index(begin = var_2859_begin_0, end = var_2859_end_0, end_mask = var_2859_end_mask_0, x = var_2803_cast_fp16)[name = tensor("op_2859_cast_fp16")]; tensor var_2860_begin_0 = const()[name = tensor("op_2860_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2860_end_0 = const()[name = tensor("op_2860_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2860_end_mask_0 = const()[name = tensor("op_2860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2860_cast_fp16 = slice_by_index(begin = var_2860_begin_0, end = var_2860_end_0, end_mask = var_2860_end_mask_0, x = var_2803_cast_fp16)[name = tensor("op_2860_cast_fp16")]; tensor var_2861_begin_0 = const()[name = tensor("op_2861_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2861_end_0 = const()[name = tensor("op_2861_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2861_end_mask_0 = const()[name = tensor("op_2861_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2861_cast_fp16 = slice_by_index(begin = var_2861_begin_0, end = var_2861_end_0, end_mask = var_2861_end_mask_0, x = var_2803_cast_fp16)[name = tensor("op_2861_cast_fp16")]; tensor var_2862_begin_0 = const()[name = tensor("op_2862_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2862_end_0 = const()[name = tensor("op_2862_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2862_end_mask_0 = const()[name = tensor("op_2862_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2862_cast_fp16 = slice_by_index(begin = var_2862_begin_0, end = var_2862_end_0, end_mask = var_2862_end_mask_0, x = var_2807_cast_fp16)[name = tensor("op_2862_cast_fp16")]; tensor var_2863_begin_0 = const()[name = tensor("op_2863_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2863_end_0 = const()[name = tensor("op_2863_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2863_end_mask_0 = const()[name = tensor("op_2863_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2863_cast_fp16 = slice_by_index(begin = var_2863_begin_0, end = var_2863_end_0, end_mask = var_2863_end_mask_0, x = var_2807_cast_fp16)[name = tensor("op_2863_cast_fp16")]; tensor var_2864_begin_0 = const()[name = tensor("op_2864_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2864_end_0 = const()[name = tensor("op_2864_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2864_end_mask_0 = const()[name = tensor("op_2864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2864_cast_fp16 = slice_by_index(begin = var_2864_begin_0, end = var_2864_end_0, end_mask = var_2864_end_mask_0, x = var_2807_cast_fp16)[name = tensor("op_2864_cast_fp16")]; tensor var_2865_begin_0 = const()[name = tensor("op_2865_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2865_end_0 = const()[name = tensor("op_2865_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2865_end_mask_0 = const()[name = tensor("op_2865_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2865_cast_fp16 = slice_by_index(begin = var_2865_begin_0, end = var_2865_end_0, end_mask = var_2865_end_mask_0, x = var_2807_cast_fp16)[name = tensor("op_2865_cast_fp16")]; tensor var_2866_begin_0 = const()[name = tensor("op_2866_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2866_end_0 = const()[name = tensor("op_2866_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2866_end_mask_0 = const()[name = tensor("op_2866_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2866_cast_fp16 = slice_by_index(begin = var_2866_begin_0, end = var_2866_end_0, end_mask = var_2866_end_mask_0, x = var_2807_cast_fp16)[name = tensor("op_2866_cast_fp16")]; tensor var_2867_begin_0 = const()[name = tensor("op_2867_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2867_end_0 = const()[name = tensor("op_2867_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2867_end_mask_0 = const()[name = tensor("op_2867_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2867_cast_fp16 = slice_by_index(begin = var_2867_begin_0, end = var_2867_end_0, end_mask = var_2867_end_mask_0, x = var_2807_cast_fp16)[name = tensor("op_2867_cast_fp16")]; tensor var_2868_begin_0 = const()[name = tensor("op_2868_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2868_end_0 = const()[name = tensor("op_2868_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2868_end_mask_0 = const()[name = tensor("op_2868_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2868_cast_fp16 = slice_by_index(begin = var_2868_begin_0, end = var_2868_end_0, end_mask = var_2868_end_mask_0, x = var_2811_cast_fp16)[name = tensor("op_2868_cast_fp16")]; tensor var_2869_begin_0 = const()[name = tensor("op_2869_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2869_end_0 = const()[name = tensor("op_2869_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2869_end_mask_0 = const()[name = tensor("op_2869_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2869_cast_fp16 = slice_by_index(begin = var_2869_begin_0, end = var_2869_end_0, end_mask = var_2869_end_mask_0, x = var_2811_cast_fp16)[name = tensor("op_2869_cast_fp16")]; tensor var_2870_begin_0 = const()[name = tensor("op_2870_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2870_end_0 = const()[name = tensor("op_2870_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2870_end_mask_0 = const()[name = tensor("op_2870_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2870_cast_fp16 = slice_by_index(begin = var_2870_begin_0, end = var_2870_end_0, end_mask = var_2870_end_mask_0, x = var_2811_cast_fp16)[name = tensor("op_2870_cast_fp16")]; tensor var_2871_begin_0 = const()[name = tensor("op_2871_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2871_end_0 = const()[name = tensor("op_2871_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2871_end_mask_0 = const()[name = tensor("op_2871_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2871_cast_fp16 = slice_by_index(begin = var_2871_begin_0, end = var_2871_end_0, end_mask = var_2871_end_mask_0, x = var_2811_cast_fp16)[name = tensor("op_2871_cast_fp16")]; tensor var_2872_begin_0 = const()[name = tensor("op_2872_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2872_end_0 = const()[name = tensor("op_2872_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2872_end_mask_0 = const()[name = tensor("op_2872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2872_cast_fp16 = slice_by_index(begin = var_2872_begin_0, end = var_2872_end_0, end_mask = var_2872_end_mask_0, x = var_2811_cast_fp16)[name = tensor("op_2872_cast_fp16")]; tensor var_2873_begin_0 = const()[name = tensor("op_2873_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2873_end_0 = const()[name = tensor("op_2873_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2873_end_mask_0 = const()[name = tensor("op_2873_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2873_cast_fp16 = slice_by_index(begin = var_2873_begin_0, end = var_2873_end_0, end_mask = var_2873_end_mask_0, x = var_2811_cast_fp16)[name = tensor("op_2873_cast_fp16")]; tensor var_2874_begin_0 = const()[name = tensor("op_2874_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2874_end_0 = const()[name = tensor("op_2874_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2874_end_mask_0 = const()[name = tensor("op_2874_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2874_cast_fp16 = slice_by_index(begin = var_2874_begin_0, end = var_2874_end_0, end_mask = var_2874_end_mask_0, x = var_2815_cast_fp16)[name = tensor("op_2874_cast_fp16")]; tensor var_2875_begin_0 = const()[name = tensor("op_2875_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2875_end_0 = const()[name = tensor("op_2875_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2875_end_mask_0 = const()[name = tensor("op_2875_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2875_cast_fp16 = slice_by_index(begin = var_2875_begin_0, end = var_2875_end_0, end_mask = var_2875_end_mask_0, x = var_2815_cast_fp16)[name = tensor("op_2875_cast_fp16")]; tensor var_2876_begin_0 = const()[name = tensor("op_2876_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2876_end_0 = const()[name = tensor("op_2876_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2876_end_mask_0 = const()[name = tensor("op_2876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2876_cast_fp16 = slice_by_index(begin = var_2876_begin_0, end = var_2876_end_0, end_mask = var_2876_end_mask_0, x = var_2815_cast_fp16)[name = tensor("op_2876_cast_fp16")]; tensor var_2877_begin_0 = const()[name = tensor("op_2877_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2877_end_0 = const()[name = tensor("op_2877_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2877_end_mask_0 = const()[name = tensor("op_2877_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2877_cast_fp16 = slice_by_index(begin = var_2877_begin_0, end = var_2877_end_0, end_mask = var_2877_end_mask_0, x = var_2815_cast_fp16)[name = tensor("op_2877_cast_fp16")]; tensor var_2878_begin_0 = const()[name = tensor("op_2878_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2878_end_0 = const()[name = tensor("op_2878_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2878_end_mask_0 = const()[name = tensor("op_2878_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2878_cast_fp16 = slice_by_index(begin = var_2878_begin_0, end = var_2878_end_0, end_mask = var_2878_end_mask_0, x = var_2815_cast_fp16)[name = tensor("op_2878_cast_fp16")]; tensor var_2879_begin_0 = const()[name = tensor("op_2879_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2879_end_0 = const()[name = tensor("op_2879_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2879_end_mask_0 = const()[name = tensor("op_2879_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2879_cast_fp16 = slice_by_index(begin = var_2879_begin_0, end = var_2879_end_0, end_mask = var_2879_end_mask_0, x = var_2815_cast_fp16)[name = tensor("op_2879_cast_fp16")]; tensor var_2880_begin_0 = const()[name = tensor("op_2880_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2880_end_0 = const()[name = tensor("op_2880_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2880_end_mask_0 = const()[name = tensor("op_2880_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2880_cast_fp16 = slice_by_index(begin = var_2880_begin_0, end = var_2880_end_0, end_mask = var_2880_end_mask_0, x = var_2819_cast_fp16)[name = tensor("op_2880_cast_fp16")]; tensor var_2881_begin_0 = const()[name = tensor("op_2881_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2881_end_0 = const()[name = tensor("op_2881_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2881_end_mask_0 = const()[name = tensor("op_2881_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2881_cast_fp16 = slice_by_index(begin = var_2881_begin_0, end = var_2881_end_0, end_mask = var_2881_end_mask_0, x = var_2819_cast_fp16)[name = tensor("op_2881_cast_fp16")]; tensor var_2882_begin_0 = const()[name = tensor("op_2882_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2882_end_0 = const()[name = tensor("op_2882_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2882_end_mask_0 = const()[name = tensor("op_2882_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2882_cast_fp16 = slice_by_index(begin = var_2882_begin_0, end = var_2882_end_0, end_mask = var_2882_end_mask_0, x = var_2819_cast_fp16)[name = tensor("op_2882_cast_fp16")]; tensor var_2883_begin_0 = const()[name = tensor("op_2883_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2883_end_0 = const()[name = tensor("op_2883_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2883_end_mask_0 = const()[name = tensor("op_2883_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2883_cast_fp16 = slice_by_index(begin = var_2883_begin_0, end = var_2883_end_0, end_mask = var_2883_end_mask_0, x = var_2819_cast_fp16)[name = tensor("op_2883_cast_fp16")]; tensor var_2884_begin_0 = const()[name = tensor("op_2884_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2884_end_0 = const()[name = tensor("op_2884_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2884_end_mask_0 = const()[name = tensor("op_2884_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2884_cast_fp16 = slice_by_index(begin = var_2884_begin_0, end = var_2884_end_0, end_mask = var_2884_end_mask_0, x = var_2819_cast_fp16)[name = tensor("op_2884_cast_fp16")]; tensor var_2885_begin_0 = const()[name = tensor("op_2885_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2885_end_0 = const()[name = tensor("op_2885_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2885_end_mask_0 = const()[name = tensor("op_2885_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2885_cast_fp16 = slice_by_index(begin = var_2885_begin_0, end = var_2885_end_0, end_mask = var_2885_end_mask_0, x = var_2819_cast_fp16)[name = tensor("op_2885_cast_fp16")]; tensor var_2886_begin_0 = const()[name = tensor("op_2886_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2886_end_0 = const()[name = tensor("op_2886_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2886_end_mask_0 = const()[name = tensor("op_2886_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2886_cast_fp16 = slice_by_index(begin = var_2886_begin_0, end = var_2886_end_0, end_mask = var_2886_end_mask_0, x = var_2823_cast_fp16)[name = tensor("op_2886_cast_fp16")]; tensor var_2887_begin_0 = const()[name = tensor("op_2887_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2887_end_0 = const()[name = tensor("op_2887_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2887_end_mask_0 = const()[name = tensor("op_2887_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2887_cast_fp16 = slice_by_index(begin = var_2887_begin_0, end = var_2887_end_0, end_mask = var_2887_end_mask_0, x = var_2823_cast_fp16)[name = tensor("op_2887_cast_fp16")]; tensor var_2888_begin_0 = const()[name = tensor("op_2888_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2888_end_0 = const()[name = tensor("op_2888_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2888_end_mask_0 = const()[name = tensor("op_2888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2888_cast_fp16 = slice_by_index(begin = var_2888_begin_0, end = var_2888_end_0, end_mask = var_2888_end_mask_0, x = var_2823_cast_fp16)[name = tensor("op_2888_cast_fp16")]; tensor var_2889_begin_0 = const()[name = tensor("op_2889_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2889_end_0 = const()[name = tensor("op_2889_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2889_end_mask_0 = const()[name = tensor("op_2889_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2889_cast_fp16 = slice_by_index(begin = var_2889_begin_0, end = var_2889_end_0, end_mask = var_2889_end_mask_0, x = var_2823_cast_fp16)[name = tensor("op_2889_cast_fp16")]; tensor var_2890_begin_0 = const()[name = tensor("op_2890_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2890_end_0 = const()[name = tensor("op_2890_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2890_end_mask_0 = const()[name = tensor("op_2890_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2890_cast_fp16 = slice_by_index(begin = var_2890_begin_0, end = var_2890_end_0, end_mask = var_2890_end_mask_0, x = var_2823_cast_fp16)[name = tensor("op_2890_cast_fp16")]; tensor var_2891_begin_0 = const()[name = tensor("op_2891_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2891_end_0 = const()[name = tensor("op_2891_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2891_end_mask_0 = const()[name = tensor("op_2891_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2891_cast_fp16 = slice_by_index(begin = var_2891_begin_0, end = var_2891_end_0, end_mask = var_2891_end_mask_0, x = var_2823_cast_fp16)[name = tensor("op_2891_cast_fp16")]; tensor var_2892_begin_0 = const()[name = tensor("op_2892_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2892_end_0 = const()[name = tensor("op_2892_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2892_end_mask_0 = const()[name = tensor("op_2892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2892_cast_fp16 = slice_by_index(begin = var_2892_begin_0, end = var_2892_end_0, end_mask = var_2892_end_mask_0, x = var_2827_cast_fp16)[name = tensor("op_2892_cast_fp16")]; tensor var_2893_begin_0 = const()[name = tensor("op_2893_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2893_end_0 = const()[name = tensor("op_2893_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2893_end_mask_0 = const()[name = tensor("op_2893_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2893_cast_fp16 = slice_by_index(begin = var_2893_begin_0, end = var_2893_end_0, end_mask = var_2893_end_mask_0, x = var_2827_cast_fp16)[name = tensor("op_2893_cast_fp16")]; tensor var_2894_begin_0 = const()[name = tensor("op_2894_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2894_end_0 = const()[name = tensor("op_2894_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2894_end_mask_0 = const()[name = tensor("op_2894_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2894_cast_fp16 = slice_by_index(begin = var_2894_begin_0, end = var_2894_end_0, end_mask = var_2894_end_mask_0, x = var_2827_cast_fp16)[name = tensor("op_2894_cast_fp16")]; tensor var_2895_begin_0 = const()[name = tensor("op_2895_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2895_end_0 = const()[name = tensor("op_2895_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2895_end_mask_0 = const()[name = tensor("op_2895_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2895_cast_fp16 = slice_by_index(begin = var_2895_begin_0, end = var_2895_end_0, end_mask = var_2895_end_mask_0, x = var_2827_cast_fp16)[name = tensor("op_2895_cast_fp16")]; tensor var_2896_begin_0 = const()[name = tensor("op_2896_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2896_end_0 = const()[name = tensor("op_2896_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2896_end_mask_0 = const()[name = tensor("op_2896_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2896_cast_fp16 = slice_by_index(begin = var_2896_begin_0, end = var_2896_end_0, end_mask = var_2896_end_mask_0, x = var_2827_cast_fp16)[name = tensor("op_2896_cast_fp16")]; tensor var_2897_begin_0 = const()[name = tensor("op_2897_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2897_end_0 = const()[name = tensor("op_2897_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2897_end_mask_0 = const()[name = tensor("op_2897_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2897_cast_fp16 = slice_by_index(begin = var_2897_begin_0, end = var_2897_end_0, end_mask = var_2897_end_mask_0, x = var_2827_cast_fp16)[name = tensor("op_2897_cast_fp16")]; tensor var_2898_begin_0 = const()[name = tensor("op_2898_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2898_end_0 = const()[name = tensor("op_2898_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2898_end_mask_0 = const()[name = tensor("op_2898_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2898_cast_fp16 = slice_by_index(begin = var_2898_begin_0, end = var_2898_end_0, end_mask = var_2898_end_mask_0, x = var_2831_cast_fp16)[name = tensor("op_2898_cast_fp16")]; tensor var_2899_begin_0 = const()[name = tensor("op_2899_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2899_end_0 = const()[name = tensor("op_2899_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2899_end_mask_0 = const()[name = tensor("op_2899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2899_cast_fp16 = slice_by_index(begin = var_2899_begin_0, end = var_2899_end_0, end_mask = var_2899_end_mask_0, x = var_2831_cast_fp16)[name = tensor("op_2899_cast_fp16")]; tensor var_2900_begin_0 = const()[name = tensor("op_2900_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2900_end_0 = const()[name = tensor("op_2900_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2900_end_mask_0 = const()[name = tensor("op_2900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2900_cast_fp16 = slice_by_index(begin = var_2900_begin_0, end = var_2900_end_0, end_mask = var_2900_end_mask_0, x = var_2831_cast_fp16)[name = tensor("op_2900_cast_fp16")]; tensor var_2901_begin_0 = const()[name = tensor("op_2901_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2901_end_0 = const()[name = tensor("op_2901_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2901_end_mask_0 = const()[name = tensor("op_2901_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2901_cast_fp16 = slice_by_index(begin = var_2901_begin_0, end = var_2901_end_0, end_mask = var_2901_end_mask_0, x = var_2831_cast_fp16)[name = tensor("op_2901_cast_fp16")]; tensor var_2902_begin_0 = const()[name = tensor("op_2902_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2902_end_0 = const()[name = tensor("op_2902_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2902_end_mask_0 = const()[name = tensor("op_2902_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2902_cast_fp16 = slice_by_index(begin = var_2902_begin_0, end = var_2902_end_0, end_mask = var_2902_end_mask_0, x = var_2831_cast_fp16)[name = tensor("op_2902_cast_fp16")]; tensor var_2903_begin_0 = const()[name = tensor("op_2903_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2903_end_0 = const()[name = tensor("op_2903_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2903_end_mask_0 = const()[name = tensor("op_2903_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2903_cast_fp16 = slice_by_index(begin = var_2903_begin_0, end = var_2903_end_0, end_mask = var_2903_end_mask_0, x = var_2831_cast_fp16)[name = tensor("op_2903_cast_fp16")]; tensor var_2904_begin_0 = const()[name = tensor("op_2904_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2904_end_0 = const()[name = tensor("op_2904_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_2904_end_mask_0 = const()[name = tensor("op_2904_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2904_cast_fp16 = slice_by_index(begin = var_2904_begin_0, end = var_2904_end_0, end_mask = var_2904_end_mask_0, x = var_2835_cast_fp16)[name = tensor("op_2904_cast_fp16")]; tensor var_2905_begin_0 = const()[name = tensor("op_2905_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2905_end_0 = const()[name = tensor("op_2905_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_2905_end_mask_0 = const()[name = tensor("op_2905_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2905_cast_fp16 = slice_by_index(begin = var_2905_begin_0, end = var_2905_end_0, end_mask = var_2905_end_mask_0, x = var_2835_cast_fp16)[name = tensor("op_2905_cast_fp16")]; tensor var_2906_begin_0 = const()[name = tensor("op_2906_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2906_end_0 = const()[name = tensor("op_2906_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_2906_end_mask_0 = const()[name = tensor("op_2906_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2906_cast_fp16 = slice_by_index(begin = var_2906_begin_0, end = var_2906_end_0, end_mask = var_2906_end_mask_0, x = var_2835_cast_fp16)[name = tensor("op_2906_cast_fp16")]; tensor var_2907_begin_0 = const()[name = tensor("op_2907_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2907_end_0 = const()[name = tensor("op_2907_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_2907_end_mask_0 = const()[name = tensor("op_2907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2907_cast_fp16 = slice_by_index(begin = var_2907_begin_0, end = var_2907_end_0, end_mask = var_2907_end_mask_0, x = var_2835_cast_fp16)[name = tensor("op_2907_cast_fp16")]; tensor var_2908_begin_0 = const()[name = tensor("op_2908_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2908_end_0 = const()[name = tensor("op_2908_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_2908_end_mask_0 = const()[name = tensor("op_2908_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2908_cast_fp16 = slice_by_index(begin = var_2908_begin_0, end = var_2908_end_0, end_mask = var_2908_end_mask_0, x = var_2835_cast_fp16)[name = tensor("op_2908_cast_fp16")]; tensor var_2909_begin_0 = const()[name = tensor("op_2909_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_2909_end_0 = const()[name = tensor("op_2909_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_2909_end_mask_0 = const()[name = tensor("op_2909_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2909_cast_fp16 = slice_by_index(begin = var_2909_begin_0, end = var_2909_end_0, end_mask = var_2909_end_mask_0, x = var_2835_cast_fp16)[name = tensor("op_2909_cast_fp16")]; tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_2914_begin_0 = const()[name = tensor("op_2914_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2914_end_0 = const()[name = tensor("op_2914_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_2914_end_mask_0 = const()[name = tensor("op_2914_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor("transpose_8")]; tensor var_2914_cast_fp16 = slice_by_index(begin = var_2914_begin_0, end = var_2914_end_0, end_mask = var_2914_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2914_cast_fp16")]; tensor var_2918_begin_0 = const()[name = tensor("op_2918_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_2918_end_0 = const()[name = tensor("op_2918_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_2918_end_mask_0 = const()[name = tensor("op_2918_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2918_cast_fp16 = slice_by_index(begin = var_2918_begin_0, end = var_2918_end_0, end_mask = var_2918_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2918_cast_fp16")]; tensor var_2922_begin_0 = const()[name = tensor("op_2922_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_2922_end_0 = const()[name = tensor("op_2922_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_2922_end_mask_0 = const()[name = tensor("op_2922_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2922_cast_fp16 = slice_by_index(begin = var_2922_begin_0, end = var_2922_end_0, end_mask = var_2922_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2922_cast_fp16")]; tensor var_2926_begin_0 = const()[name = tensor("op_2926_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_2926_end_0 = const()[name = tensor("op_2926_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_2926_end_mask_0 = const()[name = tensor("op_2926_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2926_cast_fp16 = slice_by_index(begin = var_2926_begin_0, end = var_2926_end_0, end_mask = var_2926_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2926_cast_fp16")]; tensor var_2930_begin_0 = const()[name = tensor("op_2930_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2930_end_0 = const()[name = tensor("op_2930_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_2930_end_mask_0 = const()[name = tensor("op_2930_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2930_cast_fp16 = slice_by_index(begin = var_2930_begin_0, end = var_2930_end_0, end_mask = var_2930_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2930_cast_fp16")]; tensor var_2934_begin_0 = const()[name = tensor("op_2934_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_2934_end_0 = const()[name = tensor("op_2934_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_2934_end_mask_0 = const()[name = tensor("op_2934_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2934_cast_fp16 = slice_by_index(begin = var_2934_begin_0, end = var_2934_end_0, end_mask = var_2934_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2934_cast_fp16")]; tensor var_2938_begin_0 = const()[name = tensor("op_2938_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_2938_end_0 = const()[name = tensor("op_2938_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_2938_end_mask_0 = const()[name = tensor("op_2938_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2938_cast_fp16 = slice_by_index(begin = var_2938_begin_0, end = var_2938_end_0, end_mask = var_2938_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2938_cast_fp16")]; tensor var_2942_begin_0 = const()[name = tensor("op_2942_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_2942_end_0 = const()[name = tensor("op_2942_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_2942_end_mask_0 = const()[name = tensor("op_2942_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2942_cast_fp16 = slice_by_index(begin = var_2942_begin_0, end = var_2942_end_0, end_mask = var_2942_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2942_cast_fp16")]; tensor var_2946_begin_0 = const()[name = tensor("op_2946_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2946_end_0 = const()[name = tensor("op_2946_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_2946_end_mask_0 = const()[name = tensor("op_2946_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2946_cast_fp16 = slice_by_index(begin = var_2946_begin_0, end = var_2946_end_0, end_mask = var_2946_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2946_cast_fp16")]; tensor var_2950_begin_0 = const()[name = tensor("op_2950_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_2950_end_0 = const()[name = tensor("op_2950_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_2950_end_mask_0 = const()[name = tensor("op_2950_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2950_cast_fp16 = slice_by_index(begin = var_2950_begin_0, end = var_2950_end_0, end_mask = var_2950_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2950_cast_fp16")]; tensor var_2954_begin_0 = const()[name = tensor("op_2954_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_2954_end_0 = const()[name = tensor("op_2954_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_2954_end_mask_0 = const()[name = tensor("op_2954_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2954_cast_fp16 = slice_by_index(begin = var_2954_begin_0, end = var_2954_end_0, end_mask = var_2954_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2954_cast_fp16")]; tensor var_2958_begin_0 = const()[name = tensor("op_2958_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_2958_end_0 = const()[name = tensor("op_2958_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_2958_end_mask_0 = const()[name = tensor("op_2958_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_2958_cast_fp16 = slice_by_index(begin = var_2958_begin_0, end = var_2958_end_0, end_mask = var_2958_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_2958_cast_fp16")]; tensor var_2960_begin_0 = const()[name = tensor("op_2960_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2960_end_0 = const()[name = tensor("op_2960_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2960_end_mask_0 = const()[name = tensor("op_2960_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2960_cast_fp16 = slice_by_index(begin = var_2960_begin_0, end = var_2960_end_0, end_mask = var_2960_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2960_cast_fp16")]; tensor var_2964_begin_0 = const()[name = tensor("op_2964_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2964_end_0 = const()[name = tensor("op_2964_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2964_end_mask_0 = const()[name = tensor("op_2964_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2964_cast_fp16 = slice_by_index(begin = var_2964_begin_0, end = var_2964_end_0, end_mask = var_2964_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2964_cast_fp16")]; tensor var_2968_begin_0 = const()[name = tensor("op_2968_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2968_end_0 = const()[name = tensor("op_2968_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2968_end_mask_0 = const()[name = tensor("op_2968_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2968_cast_fp16 = slice_by_index(begin = var_2968_begin_0, end = var_2968_end_0, end_mask = var_2968_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2968_cast_fp16")]; tensor var_2972_begin_0 = const()[name = tensor("op_2972_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2972_end_0 = const()[name = tensor("op_2972_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2972_end_mask_0 = const()[name = tensor("op_2972_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2972_cast_fp16 = slice_by_index(begin = var_2972_begin_0, end = var_2972_end_0, end_mask = var_2972_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2972_cast_fp16")]; tensor var_2976_begin_0 = const()[name = tensor("op_2976_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2976_end_0 = const()[name = tensor("op_2976_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2976_end_mask_0 = const()[name = tensor("op_2976_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2976_cast_fp16 = slice_by_index(begin = var_2976_begin_0, end = var_2976_end_0, end_mask = var_2976_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2976_cast_fp16")]; tensor var_2980_begin_0 = const()[name = tensor("op_2980_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2980_end_0 = const()[name = tensor("op_2980_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2980_end_mask_0 = const()[name = tensor("op_2980_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2980_cast_fp16 = slice_by_index(begin = var_2980_begin_0, end = var_2980_end_0, end_mask = var_2980_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2980_cast_fp16")]; tensor var_2984_begin_0 = const()[name = tensor("op_2984_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2984_end_0 = const()[name = tensor("op_2984_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2984_end_mask_0 = const()[name = tensor("op_2984_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2984_cast_fp16 = slice_by_index(begin = var_2984_begin_0, end = var_2984_end_0, end_mask = var_2984_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2984_cast_fp16")]; tensor var_2988_begin_0 = const()[name = tensor("op_2988_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2988_end_0 = const()[name = tensor("op_2988_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_2988_end_mask_0 = const()[name = tensor("op_2988_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2988_cast_fp16 = slice_by_index(begin = var_2988_begin_0, end = var_2988_end_0, end_mask = var_2988_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2988_cast_fp16")]; tensor var_2992_begin_0 = const()[name = tensor("op_2992_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_2992_end_0 = const()[name = tensor("op_2992_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_2992_end_mask_0 = const()[name = tensor("op_2992_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2992_cast_fp16 = slice_by_index(begin = var_2992_begin_0, end = var_2992_end_0, end_mask = var_2992_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2992_cast_fp16")]; tensor var_2996_begin_0 = const()[name = tensor("op_2996_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_2996_end_0 = const()[name = tensor("op_2996_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_2996_end_mask_0 = const()[name = tensor("op_2996_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2996_cast_fp16 = slice_by_index(begin = var_2996_begin_0, end = var_2996_end_0, end_mask = var_2996_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_2996_cast_fp16")]; tensor var_3000_begin_0 = const()[name = tensor("op_3000_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_3000_end_0 = const()[name = tensor("op_3000_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_3000_end_mask_0 = const()[name = tensor("op_3000_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3000_cast_fp16 = slice_by_index(begin = var_3000_begin_0, end = var_3000_end_0, end_mask = var_3000_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_3000_cast_fp16")]; tensor var_3004_begin_0 = const()[name = tensor("op_3004_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_3004_end_0 = const()[name = tensor("op_3004_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_3004_end_mask_0 = const()[name = tensor("op_3004_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3004_cast_fp16 = slice_by_index(begin = var_3004_begin_0, end = var_3004_end_0, end_mask = var_3004_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_3004_cast_fp16")]; tensor _SplitHeadsQ__mh_w_433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_433_equation_0, values = (var_2914_cast_fp16, var_2838_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_435_equation_0, values = (var_2914_cast_fp16, var_2839_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_437_equation_0, values = (var_2914_cast_fp16, var_2840_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_439_equation_0, values = (var_2914_cast_fp16, var_2841_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_439_cast_fp16")]; tensor _SplitHeadsQ__mh_w_441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_441_equation_0, values = (var_2914_cast_fp16, var_2842_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_443_equation_0, values = (var_2914_cast_fp16, var_2843_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_445_equation_0, values = (var_2918_cast_fp16, var_2844_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_447_equation_0, values = (var_2918_cast_fp16, var_2845_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_449_equation_0, values = (var_2918_cast_fp16, var_2846_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_451_equation_0, values = (var_2918_cast_fp16, var_2847_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_453_equation_0, values = (var_2918_cast_fp16, var_2848_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_455_equation_0, values = (var_2918_cast_fp16, var_2849_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_457_equation_0, values = (var_2922_cast_fp16, var_2850_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_459_equation_0, values = (var_2922_cast_fp16, var_2851_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_461_equation_0, values = (var_2922_cast_fp16, var_2852_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_463_equation_0, values = (var_2922_cast_fp16, var_2853_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_465_equation_0, values = (var_2922_cast_fp16, var_2854_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_467_equation_0, values = (var_2922_cast_fp16, var_2855_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_469_equation_0, values = (var_2926_cast_fp16, var_2856_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_471_equation_0, values = (var_2926_cast_fp16, var_2857_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_473_equation_0, values = (var_2926_cast_fp16, var_2858_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_475_equation_0, values = (var_2926_cast_fp16, var_2859_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_477_equation_0, values = (var_2926_cast_fp16, var_2860_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_479_equation_0, values = (var_2926_cast_fp16, var_2861_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_479_cast_fp16")]; tensor _SplitHeadsQ__mh_w_481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_481_equation_0, values = (var_2930_cast_fp16, var_2862_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_483_equation_0, values = (var_2930_cast_fp16, var_2863_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_485_equation_0, values = (var_2930_cast_fp16, var_2864_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_487_equation_0, values = (var_2930_cast_fp16, var_2865_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_489_equation_0, values = (var_2930_cast_fp16, var_2866_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_491_equation_0, values = (var_2930_cast_fp16, var_2867_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_493_equation_0, values = (var_2934_cast_fp16, var_2868_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_495_equation_0, values = (var_2934_cast_fp16, var_2869_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_497_equation_0, values = (var_2934_cast_fp16, var_2870_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_499_equation_0, values = (var_2934_cast_fp16, var_2871_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_501_equation_0, values = (var_2934_cast_fp16, var_2872_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_503_equation_0, values = (var_2934_cast_fp16, var_2873_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_505_equation_0, values = (var_2938_cast_fp16, var_2874_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_507_equation_0, values = (var_2938_cast_fp16, var_2875_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_509_equation_0, values = (var_2938_cast_fp16, var_2876_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_511_equation_0, values = (var_2938_cast_fp16, var_2877_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_513_equation_0, values = (var_2938_cast_fp16, var_2878_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_515_equation_0, values = (var_2938_cast_fp16, var_2879_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_517_equation_0, values = (var_2942_cast_fp16, var_2880_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_519_equation_0, values = (var_2942_cast_fp16, var_2881_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_521_equation_0, values = (var_2942_cast_fp16, var_2882_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_523_equation_0, values = (var_2942_cast_fp16, var_2883_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_525_equation_0, values = (var_2942_cast_fp16, var_2884_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_527_equation_0, values = (var_2942_cast_fp16, var_2885_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_529_equation_0, values = (var_2946_cast_fp16, var_2886_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_531_equation_0, values = (var_2946_cast_fp16, var_2887_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_533_equation_0, values = (var_2946_cast_fp16, var_2888_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_535_equation_0, values = (var_2946_cast_fp16, var_2889_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_537_equation_0, values = (var_2946_cast_fp16, var_2890_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_539_equation_0, values = (var_2946_cast_fp16, var_2891_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_541_equation_0, values = (var_2950_cast_fp16, var_2892_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_543_equation_0, values = (var_2950_cast_fp16, var_2893_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_545_equation_0, values = (var_2950_cast_fp16, var_2894_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_547_equation_0, values = (var_2950_cast_fp16, var_2895_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_549_equation_0, values = (var_2950_cast_fp16, var_2896_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_551_equation_0, values = (var_2950_cast_fp16, var_2897_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_553_equation_0, values = (var_2954_cast_fp16, var_2898_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_555_equation_0, values = (var_2954_cast_fp16, var_2899_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_557_equation_0, values = (var_2954_cast_fp16, var_2900_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_559_equation_0, values = (var_2954_cast_fp16, var_2901_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_561_equation_0, values = (var_2954_cast_fp16, var_2902_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_563_equation_0, values = (var_2954_cast_fp16, var_2903_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_565_equation_0, values = (var_2958_cast_fp16, var_2904_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_567_equation_0, values = (var_2958_cast_fp16, var_2905_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_569_equation_0, values = (var_2958_cast_fp16, var_2906_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_571_equation_0, values = (var_2958_cast_fp16, var_2907_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_573_equation_0, values = (var_2958_cast_fp16, var_2908_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_575_equation_0, values = (var_2958_cast_fp16, var_2909_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_575_cast_fp16")]; tensor var_3151_to_fp16 = const()[name = tensor("op_3151_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_433_cast_fp16, y = var_3151_to_fp16)[name = tensor("aw_chunk_433_cast_fp16")]; tensor var_3153_to_fp16 = const()[name = tensor("op_3153_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_435_cast_fp16, y = var_3153_to_fp16)[name = tensor("aw_chunk_435_cast_fp16")]; tensor var_3155_to_fp16 = const()[name = tensor("op_3155_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_437_cast_fp16, y = var_3155_to_fp16)[name = tensor("aw_chunk_437_cast_fp16")]; tensor var_3157_to_fp16 = const()[name = tensor("op_3157_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_439_cast_fp16, y = var_3157_to_fp16)[name = tensor("aw_chunk_439_cast_fp16")]; tensor var_3159_to_fp16 = const()[name = tensor("op_3159_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_441_cast_fp16, y = var_3159_to_fp16)[name = tensor("aw_chunk_441_cast_fp16")]; tensor var_3161_to_fp16 = const()[name = tensor("op_3161_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_443_cast_fp16, y = var_3161_to_fp16)[name = tensor("aw_chunk_443_cast_fp16")]; tensor var_3163_to_fp16 = const()[name = tensor("op_3163_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_445_cast_fp16, y = var_3163_to_fp16)[name = tensor("aw_chunk_445_cast_fp16")]; tensor var_3165_to_fp16 = const()[name = tensor("op_3165_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_447_cast_fp16, y = var_3165_to_fp16)[name = tensor("aw_chunk_447_cast_fp16")]; tensor var_3167_to_fp16 = const()[name = tensor("op_3167_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_449_cast_fp16, y = var_3167_to_fp16)[name = tensor("aw_chunk_449_cast_fp16")]; tensor var_3169_to_fp16 = const()[name = tensor("op_3169_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_451_cast_fp16, y = var_3169_to_fp16)[name = tensor("aw_chunk_451_cast_fp16")]; tensor var_3171_to_fp16 = const()[name = tensor("op_3171_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_453_cast_fp16, y = var_3171_to_fp16)[name = tensor("aw_chunk_453_cast_fp16")]; tensor var_3173_to_fp16 = const()[name = tensor("op_3173_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_455_cast_fp16, y = var_3173_to_fp16)[name = tensor("aw_chunk_455_cast_fp16")]; tensor var_3175_to_fp16 = const()[name = tensor("op_3175_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_457_cast_fp16, y = var_3175_to_fp16)[name = tensor("aw_chunk_457_cast_fp16")]; tensor var_3177_to_fp16 = const()[name = tensor("op_3177_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_459_cast_fp16, y = var_3177_to_fp16)[name = tensor("aw_chunk_459_cast_fp16")]; tensor var_3179_to_fp16 = const()[name = tensor("op_3179_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_461_cast_fp16, y = var_3179_to_fp16)[name = tensor("aw_chunk_461_cast_fp16")]; tensor var_3181_to_fp16 = const()[name = tensor("op_3181_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_463_cast_fp16, y = var_3181_to_fp16)[name = tensor("aw_chunk_463_cast_fp16")]; tensor var_3183_to_fp16 = const()[name = tensor("op_3183_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_465_cast_fp16, y = var_3183_to_fp16)[name = tensor("aw_chunk_465_cast_fp16")]; tensor var_3185_to_fp16 = const()[name = tensor("op_3185_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_467_cast_fp16, y = var_3185_to_fp16)[name = tensor("aw_chunk_467_cast_fp16")]; tensor var_3187_to_fp16 = const()[name = tensor("op_3187_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_469_cast_fp16, y = var_3187_to_fp16)[name = tensor("aw_chunk_469_cast_fp16")]; tensor var_3189_to_fp16 = const()[name = tensor("op_3189_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_471_cast_fp16, y = var_3189_to_fp16)[name = tensor("aw_chunk_471_cast_fp16")]; tensor var_3191_to_fp16 = const()[name = tensor("op_3191_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_473_cast_fp16, y = var_3191_to_fp16)[name = tensor("aw_chunk_473_cast_fp16")]; tensor var_3193_to_fp16 = const()[name = tensor("op_3193_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_475_cast_fp16, y = var_3193_to_fp16)[name = tensor("aw_chunk_475_cast_fp16")]; tensor var_3195_to_fp16 = const()[name = tensor("op_3195_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_477_cast_fp16, y = var_3195_to_fp16)[name = tensor("aw_chunk_477_cast_fp16")]; tensor var_3197_to_fp16 = const()[name = tensor("op_3197_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_479_cast_fp16, y = var_3197_to_fp16)[name = tensor("aw_chunk_479_cast_fp16")]; tensor var_3199_to_fp16 = const()[name = tensor("op_3199_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_481_cast_fp16, y = var_3199_to_fp16)[name = tensor("aw_chunk_481_cast_fp16")]; tensor var_3201_to_fp16 = const()[name = tensor("op_3201_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_483_cast_fp16, y = var_3201_to_fp16)[name = tensor("aw_chunk_483_cast_fp16")]; tensor var_3203_to_fp16 = const()[name = tensor("op_3203_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_485_cast_fp16, y = var_3203_to_fp16)[name = tensor("aw_chunk_485_cast_fp16")]; tensor var_3205_to_fp16 = const()[name = tensor("op_3205_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_487_cast_fp16, y = var_3205_to_fp16)[name = tensor("aw_chunk_487_cast_fp16")]; tensor var_3207_to_fp16 = const()[name = tensor("op_3207_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_489_cast_fp16, y = var_3207_to_fp16)[name = tensor("aw_chunk_489_cast_fp16")]; tensor var_3209_to_fp16 = const()[name = tensor("op_3209_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_491_cast_fp16, y = var_3209_to_fp16)[name = tensor("aw_chunk_491_cast_fp16")]; tensor var_3211_to_fp16 = const()[name = tensor("op_3211_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_493_cast_fp16, y = var_3211_to_fp16)[name = tensor("aw_chunk_493_cast_fp16")]; tensor var_3213_to_fp16 = const()[name = tensor("op_3213_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_495_cast_fp16, y = var_3213_to_fp16)[name = tensor("aw_chunk_495_cast_fp16")]; tensor var_3215_to_fp16 = const()[name = tensor("op_3215_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_497_cast_fp16, y = var_3215_to_fp16)[name = tensor("aw_chunk_497_cast_fp16")]; tensor var_3217_to_fp16 = const()[name = tensor("op_3217_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_499_cast_fp16, y = var_3217_to_fp16)[name = tensor("aw_chunk_499_cast_fp16")]; tensor var_3219_to_fp16 = const()[name = tensor("op_3219_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_501_cast_fp16, y = var_3219_to_fp16)[name = tensor("aw_chunk_501_cast_fp16")]; tensor var_3221_to_fp16 = const()[name = tensor("op_3221_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_503_cast_fp16, y = var_3221_to_fp16)[name = tensor("aw_chunk_503_cast_fp16")]; tensor var_3223_to_fp16 = const()[name = tensor("op_3223_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_505_cast_fp16, y = var_3223_to_fp16)[name = tensor("aw_chunk_505_cast_fp16")]; tensor var_3225_to_fp16 = const()[name = tensor("op_3225_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_507_cast_fp16, y = var_3225_to_fp16)[name = tensor("aw_chunk_507_cast_fp16")]; tensor var_3227_to_fp16 = const()[name = tensor("op_3227_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_509_cast_fp16, y = var_3227_to_fp16)[name = tensor("aw_chunk_509_cast_fp16")]; tensor var_3229_to_fp16 = const()[name = tensor("op_3229_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_511_cast_fp16, y = var_3229_to_fp16)[name = tensor("aw_chunk_511_cast_fp16")]; tensor var_3231_to_fp16 = const()[name = tensor("op_3231_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_513_cast_fp16, y = var_3231_to_fp16)[name = tensor("aw_chunk_513_cast_fp16")]; tensor var_3233_to_fp16 = const()[name = tensor("op_3233_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_515_cast_fp16, y = var_3233_to_fp16)[name = tensor("aw_chunk_515_cast_fp16")]; tensor var_3235_to_fp16 = const()[name = tensor("op_3235_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_517_cast_fp16, y = var_3235_to_fp16)[name = tensor("aw_chunk_517_cast_fp16")]; tensor var_3237_to_fp16 = const()[name = tensor("op_3237_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_519_cast_fp16, y = var_3237_to_fp16)[name = tensor("aw_chunk_519_cast_fp16")]; tensor var_3239_to_fp16 = const()[name = tensor("op_3239_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_521_cast_fp16, y = var_3239_to_fp16)[name = tensor("aw_chunk_521_cast_fp16")]; tensor var_3241_to_fp16 = const()[name = tensor("op_3241_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_523_cast_fp16, y = var_3241_to_fp16)[name = tensor("aw_chunk_523_cast_fp16")]; tensor var_3243_to_fp16 = const()[name = tensor("op_3243_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_525_cast_fp16, y = var_3243_to_fp16)[name = tensor("aw_chunk_525_cast_fp16")]; tensor var_3245_to_fp16 = const()[name = tensor("op_3245_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_527_cast_fp16, y = var_3245_to_fp16)[name = tensor("aw_chunk_527_cast_fp16")]; tensor var_3247_to_fp16 = const()[name = tensor("op_3247_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_529_cast_fp16, y = var_3247_to_fp16)[name = tensor("aw_chunk_529_cast_fp16")]; tensor var_3249_to_fp16 = const()[name = tensor("op_3249_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_531_cast_fp16, y = var_3249_to_fp16)[name = tensor("aw_chunk_531_cast_fp16")]; tensor var_3251_to_fp16 = const()[name = tensor("op_3251_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_533_cast_fp16, y = var_3251_to_fp16)[name = tensor("aw_chunk_533_cast_fp16")]; tensor var_3253_to_fp16 = const()[name = tensor("op_3253_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_535_cast_fp16, y = var_3253_to_fp16)[name = tensor("aw_chunk_535_cast_fp16")]; tensor var_3255_to_fp16 = const()[name = tensor("op_3255_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_537_cast_fp16, y = var_3255_to_fp16)[name = tensor("aw_chunk_537_cast_fp16")]; tensor var_3257_to_fp16 = const()[name = tensor("op_3257_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_539_cast_fp16, y = var_3257_to_fp16)[name = tensor("aw_chunk_539_cast_fp16")]; tensor var_3259_to_fp16 = const()[name = tensor("op_3259_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_541_cast_fp16, y = var_3259_to_fp16)[name = tensor("aw_chunk_541_cast_fp16")]; tensor var_3261_to_fp16 = const()[name = tensor("op_3261_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_543_cast_fp16, y = var_3261_to_fp16)[name = tensor("aw_chunk_543_cast_fp16")]; tensor var_3263_to_fp16 = const()[name = tensor("op_3263_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_545_cast_fp16, y = var_3263_to_fp16)[name = tensor("aw_chunk_545_cast_fp16")]; tensor var_3265_to_fp16 = const()[name = tensor("op_3265_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_547_cast_fp16, y = var_3265_to_fp16)[name = tensor("aw_chunk_547_cast_fp16")]; tensor var_3267_to_fp16 = const()[name = tensor("op_3267_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_549_cast_fp16, y = var_3267_to_fp16)[name = tensor("aw_chunk_549_cast_fp16")]; tensor var_3269_to_fp16 = const()[name = tensor("op_3269_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_551_cast_fp16, y = var_3269_to_fp16)[name = tensor("aw_chunk_551_cast_fp16")]; tensor var_3271_to_fp16 = const()[name = tensor("op_3271_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_553_cast_fp16, y = var_3271_to_fp16)[name = tensor("aw_chunk_553_cast_fp16")]; tensor var_3273_to_fp16 = const()[name = tensor("op_3273_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_555_cast_fp16, y = var_3273_to_fp16)[name = tensor("aw_chunk_555_cast_fp16")]; tensor var_3275_to_fp16 = const()[name = tensor("op_3275_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_557_cast_fp16, y = var_3275_to_fp16)[name = tensor("aw_chunk_557_cast_fp16")]; tensor var_3277_to_fp16 = const()[name = tensor("op_3277_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_559_cast_fp16, y = var_3277_to_fp16)[name = tensor("aw_chunk_559_cast_fp16")]; tensor var_3279_to_fp16 = const()[name = tensor("op_3279_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_561_cast_fp16, y = var_3279_to_fp16)[name = tensor("aw_chunk_561_cast_fp16")]; tensor var_3281_to_fp16 = const()[name = tensor("op_3281_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_563_cast_fp16, y = var_3281_to_fp16)[name = tensor("aw_chunk_563_cast_fp16")]; tensor var_3283_to_fp16 = const()[name = tensor("op_3283_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_565_cast_fp16, y = var_3283_to_fp16)[name = tensor("aw_chunk_565_cast_fp16")]; tensor var_3285_to_fp16 = const()[name = tensor("op_3285_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_567_cast_fp16, y = var_3285_to_fp16)[name = tensor("aw_chunk_567_cast_fp16")]; tensor var_3287_to_fp16 = const()[name = tensor("op_3287_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_569_cast_fp16, y = var_3287_to_fp16)[name = tensor("aw_chunk_569_cast_fp16")]; tensor var_3289_to_fp16 = const()[name = tensor("op_3289_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_571_cast_fp16, y = var_3289_to_fp16)[name = tensor("aw_chunk_571_cast_fp16")]; tensor var_3291_to_fp16 = const()[name = tensor("op_3291_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_573_cast_fp16, y = var_3291_to_fp16)[name = tensor("aw_chunk_573_cast_fp16")]; tensor var_3293_to_fp16 = const()[name = tensor("op_3293_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_575_cast_fp16, y = var_3293_to_fp16)[name = tensor("aw_chunk_575_cast_fp16")]; tensor var_3295_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_433_cast_fp16)[name = tensor("op_3295_cast_fp16")]; tensor var_3296_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_435_cast_fp16)[name = tensor("op_3296_cast_fp16")]; tensor var_3297_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_437_cast_fp16)[name = tensor("op_3297_cast_fp16")]; tensor var_3298_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_439_cast_fp16)[name = tensor("op_3298_cast_fp16")]; tensor var_3299_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_441_cast_fp16)[name = tensor("op_3299_cast_fp16")]; tensor var_3300_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_443_cast_fp16)[name = tensor("op_3300_cast_fp16")]; tensor var_3301_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_445_cast_fp16)[name = tensor("op_3301_cast_fp16")]; tensor var_3302_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_447_cast_fp16)[name = tensor("op_3302_cast_fp16")]; tensor var_3303_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_449_cast_fp16)[name = tensor("op_3303_cast_fp16")]; tensor var_3304_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_451_cast_fp16)[name = tensor("op_3304_cast_fp16")]; tensor var_3305_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_453_cast_fp16)[name = tensor("op_3305_cast_fp16")]; tensor var_3306_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_455_cast_fp16)[name = tensor("op_3306_cast_fp16")]; tensor var_3307_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_457_cast_fp16)[name = tensor("op_3307_cast_fp16")]; tensor var_3308_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_459_cast_fp16)[name = tensor("op_3308_cast_fp16")]; tensor var_3309_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_461_cast_fp16)[name = tensor("op_3309_cast_fp16")]; tensor var_3310_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_463_cast_fp16)[name = tensor("op_3310_cast_fp16")]; tensor var_3311_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_465_cast_fp16)[name = tensor("op_3311_cast_fp16")]; tensor var_3312_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_467_cast_fp16)[name = tensor("op_3312_cast_fp16")]; tensor var_3313_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_469_cast_fp16)[name = tensor("op_3313_cast_fp16")]; tensor var_3314_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_471_cast_fp16)[name = tensor("op_3314_cast_fp16")]; tensor var_3315_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_473_cast_fp16)[name = tensor("op_3315_cast_fp16")]; tensor var_3316_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_475_cast_fp16)[name = tensor("op_3316_cast_fp16")]; tensor var_3317_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_477_cast_fp16)[name = tensor("op_3317_cast_fp16")]; tensor var_3318_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_479_cast_fp16)[name = tensor("op_3318_cast_fp16")]; tensor var_3319_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_481_cast_fp16)[name = tensor("op_3319_cast_fp16")]; tensor var_3320_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_483_cast_fp16)[name = tensor("op_3320_cast_fp16")]; tensor var_3321_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_485_cast_fp16)[name = tensor("op_3321_cast_fp16")]; tensor var_3322_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_487_cast_fp16)[name = tensor("op_3322_cast_fp16")]; tensor var_3323_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_489_cast_fp16)[name = tensor("op_3323_cast_fp16")]; tensor var_3324_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_491_cast_fp16)[name = tensor("op_3324_cast_fp16")]; tensor var_3325_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_493_cast_fp16)[name = tensor("op_3325_cast_fp16")]; tensor var_3326_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_495_cast_fp16)[name = tensor("op_3326_cast_fp16")]; tensor var_3327_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_497_cast_fp16)[name = tensor("op_3327_cast_fp16")]; tensor var_3328_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_499_cast_fp16)[name = tensor("op_3328_cast_fp16")]; tensor var_3329_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_501_cast_fp16)[name = tensor("op_3329_cast_fp16")]; tensor var_3330_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_503_cast_fp16)[name = tensor("op_3330_cast_fp16")]; tensor var_3331_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_505_cast_fp16)[name = tensor("op_3331_cast_fp16")]; tensor var_3332_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_507_cast_fp16)[name = tensor("op_3332_cast_fp16")]; tensor var_3333_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_509_cast_fp16)[name = tensor("op_3333_cast_fp16")]; tensor var_3334_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_511_cast_fp16)[name = tensor("op_3334_cast_fp16")]; tensor var_3335_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_513_cast_fp16)[name = tensor("op_3335_cast_fp16")]; tensor var_3336_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_515_cast_fp16)[name = tensor("op_3336_cast_fp16")]; tensor var_3337_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_517_cast_fp16)[name = tensor("op_3337_cast_fp16")]; tensor var_3338_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_519_cast_fp16)[name = tensor("op_3338_cast_fp16")]; tensor var_3339_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_521_cast_fp16)[name = tensor("op_3339_cast_fp16")]; tensor var_3340_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_523_cast_fp16)[name = tensor("op_3340_cast_fp16")]; tensor var_3341_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_525_cast_fp16)[name = tensor("op_3341_cast_fp16")]; tensor var_3342_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_527_cast_fp16)[name = tensor("op_3342_cast_fp16")]; tensor var_3343_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_529_cast_fp16)[name = tensor("op_3343_cast_fp16")]; tensor var_3344_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_531_cast_fp16)[name = tensor("op_3344_cast_fp16")]; tensor var_3345_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_533_cast_fp16)[name = tensor("op_3345_cast_fp16")]; tensor var_3346_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_535_cast_fp16)[name = tensor("op_3346_cast_fp16")]; tensor var_3347_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_537_cast_fp16)[name = tensor("op_3347_cast_fp16")]; tensor var_3348_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_539_cast_fp16)[name = tensor("op_3348_cast_fp16")]; tensor var_3349_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_541_cast_fp16)[name = tensor("op_3349_cast_fp16")]; tensor var_3350_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_543_cast_fp16)[name = tensor("op_3350_cast_fp16")]; tensor var_3351_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_545_cast_fp16)[name = tensor("op_3351_cast_fp16")]; tensor var_3352_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_547_cast_fp16)[name = tensor("op_3352_cast_fp16")]; tensor var_3353_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_549_cast_fp16)[name = tensor("op_3353_cast_fp16")]; tensor var_3354_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_551_cast_fp16)[name = tensor("op_3354_cast_fp16")]; tensor var_3355_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_553_cast_fp16)[name = tensor("op_3355_cast_fp16")]; tensor var_3356_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_555_cast_fp16)[name = tensor("op_3356_cast_fp16")]; tensor var_3357_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_557_cast_fp16)[name = tensor("op_3357_cast_fp16")]; tensor var_3358_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_559_cast_fp16)[name = tensor("op_3358_cast_fp16")]; tensor var_3359_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_561_cast_fp16)[name = tensor("op_3359_cast_fp16")]; tensor var_3360_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_563_cast_fp16)[name = tensor("op_3360_cast_fp16")]; tensor var_3361_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_565_cast_fp16)[name = tensor("op_3361_cast_fp16")]; tensor var_3362_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_567_cast_fp16)[name = tensor("op_3362_cast_fp16")]; tensor var_3363_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_569_cast_fp16)[name = tensor("op_3363_cast_fp16")]; tensor var_3364_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_571_cast_fp16)[name = tensor("op_3364_cast_fp16")]; tensor var_3365_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_573_cast_fp16)[name = tensor("op_3365_cast_fp16")]; tensor var_3366_cast_fp16 = softmax(axis = var_2739, x = aw_chunk_575_cast_fp16)[name = tensor("op_3366_cast_fp16")]; tensor var_3368_equation_0 = const()[name = tensor("op_3368_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3368_cast_fp16 = einsum(equation = var_3368_equation_0, values = (var_2960_cast_fp16, var_3295_cast_fp16))[name = tensor("op_3368_cast_fp16")]; tensor var_3370_equation_0 = const()[name = tensor("op_3370_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3370_cast_fp16 = einsum(equation = var_3370_equation_0, values = (var_2960_cast_fp16, var_3296_cast_fp16))[name = tensor("op_3370_cast_fp16")]; tensor var_3372_equation_0 = const()[name = tensor("op_3372_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3372_cast_fp16 = einsum(equation = var_3372_equation_0, values = (var_2960_cast_fp16, var_3297_cast_fp16))[name = tensor("op_3372_cast_fp16")]; tensor var_3374_equation_0 = const()[name = tensor("op_3374_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3374_cast_fp16 = einsum(equation = var_3374_equation_0, values = (var_2960_cast_fp16, var_3298_cast_fp16))[name = tensor("op_3374_cast_fp16")]; tensor var_3376_equation_0 = const()[name = tensor("op_3376_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3376_cast_fp16 = einsum(equation = var_3376_equation_0, values = (var_2960_cast_fp16, var_3299_cast_fp16))[name = tensor("op_3376_cast_fp16")]; tensor var_3378_equation_0 = const()[name = tensor("op_3378_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3378_cast_fp16 = einsum(equation = var_3378_equation_0, values = (var_2960_cast_fp16, var_3300_cast_fp16))[name = tensor("op_3378_cast_fp16")]; tensor var_3380_equation_0 = const()[name = tensor("op_3380_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3380_cast_fp16 = einsum(equation = var_3380_equation_0, values = (var_2964_cast_fp16, var_3301_cast_fp16))[name = tensor("op_3380_cast_fp16")]; tensor var_3382_equation_0 = const()[name = tensor("op_3382_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3382_cast_fp16 = einsum(equation = var_3382_equation_0, values = (var_2964_cast_fp16, var_3302_cast_fp16))[name = tensor("op_3382_cast_fp16")]; tensor var_3384_equation_0 = const()[name = tensor("op_3384_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3384_cast_fp16 = einsum(equation = var_3384_equation_0, values = (var_2964_cast_fp16, var_3303_cast_fp16))[name = tensor("op_3384_cast_fp16")]; tensor var_3386_equation_0 = const()[name = tensor("op_3386_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3386_cast_fp16 = einsum(equation = var_3386_equation_0, values = (var_2964_cast_fp16, var_3304_cast_fp16))[name = tensor("op_3386_cast_fp16")]; tensor var_3388_equation_0 = const()[name = tensor("op_3388_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3388_cast_fp16 = einsum(equation = var_3388_equation_0, values = (var_2964_cast_fp16, var_3305_cast_fp16))[name = tensor("op_3388_cast_fp16")]; tensor var_3390_equation_0 = const()[name = tensor("op_3390_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3390_cast_fp16 = einsum(equation = var_3390_equation_0, values = (var_2964_cast_fp16, var_3306_cast_fp16))[name = tensor("op_3390_cast_fp16")]; tensor var_3392_equation_0 = const()[name = tensor("op_3392_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3392_cast_fp16 = einsum(equation = var_3392_equation_0, values = (var_2968_cast_fp16, var_3307_cast_fp16))[name = tensor("op_3392_cast_fp16")]; tensor var_3394_equation_0 = const()[name = tensor("op_3394_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3394_cast_fp16 = einsum(equation = var_3394_equation_0, values = (var_2968_cast_fp16, var_3308_cast_fp16))[name = tensor("op_3394_cast_fp16")]; tensor var_3396_equation_0 = const()[name = tensor("op_3396_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3396_cast_fp16 = einsum(equation = var_3396_equation_0, values = (var_2968_cast_fp16, var_3309_cast_fp16))[name = tensor("op_3396_cast_fp16")]; tensor var_3398_equation_0 = const()[name = tensor("op_3398_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3398_cast_fp16 = einsum(equation = var_3398_equation_0, values = (var_2968_cast_fp16, var_3310_cast_fp16))[name = tensor("op_3398_cast_fp16")]; tensor var_3400_equation_0 = const()[name = tensor("op_3400_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3400_cast_fp16 = einsum(equation = var_3400_equation_0, values = (var_2968_cast_fp16, var_3311_cast_fp16))[name = tensor("op_3400_cast_fp16")]; tensor var_3402_equation_0 = const()[name = tensor("op_3402_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3402_cast_fp16 = einsum(equation = var_3402_equation_0, values = (var_2968_cast_fp16, var_3312_cast_fp16))[name = tensor("op_3402_cast_fp16")]; tensor var_3404_equation_0 = const()[name = tensor("op_3404_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3404_cast_fp16 = einsum(equation = var_3404_equation_0, values = (var_2972_cast_fp16, var_3313_cast_fp16))[name = tensor("op_3404_cast_fp16")]; tensor var_3406_equation_0 = const()[name = tensor("op_3406_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3406_cast_fp16 = einsum(equation = var_3406_equation_0, values = (var_2972_cast_fp16, var_3314_cast_fp16))[name = tensor("op_3406_cast_fp16")]; tensor var_3408_equation_0 = const()[name = tensor("op_3408_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3408_cast_fp16 = einsum(equation = var_3408_equation_0, values = (var_2972_cast_fp16, var_3315_cast_fp16))[name = tensor("op_3408_cast_fp16")]; tensor var_3410_equation_0 = const()[name = tensor("op_3410_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3410_cast_fp16 = einsum(equation = var_3410_equation_0, values = (var_2972_cast_fp16, var_3316_cast_fp16))[name = tensor("op_3410_cast_fp16")]; tensor var_3412_equation_0 = const()[name = tensor("op_3412_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3412_cast_fp16 = einsum(equation = var_3412_equation_0, values = (var_2972_cast_fp16, var_3317_cast_fp16))[name = tensor("op_3412_cast_fp16")]; tensor var_3414_equation_0 = const()[name = tensor("op_3414_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3414_cast_fp16 = einsum(equation = var_3414_equation_0, values = (var_2972_cast_fp16, var_3318_cast_fp16))[name = tensor("op_3414_cast_fp16")]; tensor var_3416_equation_0 = const()[name = tensor("op_3416_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3416_cast_fp16 = einsum(equation = var_3416_equation_0, values = (var_2976_cast_fp16, var_3319_cast_fp16))[name = tensor("op_3416_cast_fp16")]; tensor var_3418_equation_0 = const()[name = tensor("op_3418_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3418_cast_fp16 = einsum(equation = var_3418_equation_0, values = (var_2976_cast_fp16, var_3320_cast_fp16))[name = tensor("op_3418_cast_fp16")]; tensor var_3420_equation_0 = const()[name = tensor("op_3420_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3420_cast_fp16 = einsum(equation = var_3420_equation_0, values = (var_2976_cast_fp16, var_3321_cast_fp16))[name = tensor("op_3420_cast_fp16")]; tensor var_3422_equation_0 = const()[name = tensor("op_3422_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3422_cast_fp16 = einsum(equation = var_3422_equation_0, values = (var_2976_cast_fp16, var_3322_cast_fp16))[name = tensor("op_3422_cast_fp16")]; tensor var_3424_equation_0 = const()[name = tensor("op_3424_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3424_cast_fp16 = einsum(equation = var_3424_equation_0, values = (var_2976_cast_fp16, var_3323_cast_fp16))[name = tensor("op_3424_cast_fp16")]; tensor var_3426_equation_0 = const()[name = tensor("op_3426_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3426_cast_fp16 = einsum(equation = var_3426_equation_0, values = (var_2976_cast_fp16, var_3324_cast_fp16))[name = tensor("op_3426_cast_fp16")]; tensor var_3428_equation_0 = const()[name = tensor("op_3428_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3428_cast_fp16 = einsum(equation = var_3428_equation_0, values = (var_2980_cast_fp16, var_3325_cast_fp16))[name = tensor("op_3428_cast_fp16")]; tensor var_3430_equation_0 = const()[name = tensor("op_3430_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3430_cast_fp16 = einsum(equation = var_3430_equation_0, values = (var_2980_cast_fp16, var_3326_cast_fp16))[name = tensor("op_3430_cast_fp16")]; tensor var_3432_equation_0 = const()[name = tensor("op_3432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3432_cast_fp16 = einsum(equation = var_3432_equation_0, values = (var_2980_cast_fp16, var_3327_cast_fp16))[name = tensor("op_3432_cast_fp16")]; tensor var_3434_equation_0 = const()[name = tensor("op_3434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3434_cast_fp16 = einsum(equation = var_3434_equation_0, values = (var_2980_cast_fp16, var_3328_cast_fp16))[name = tensor("op_3434_cast_fp16")]; tensor var_3436_equation_0 = const()[name = tensor("op_3436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3436_cast_fp16 = einsum(equation = var_3436_equation_0, values = (var_2980_cast_fp16, var_3329_cast_fp16))[name = tensor("op_3436_cast_fp16")]; tensor var_3438_equation_0 = const()[name = tensor("op_3438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3438_cast_fp16 = einsum(equation = var_3438_equation_0, values = (var_2980_cast_fp16, var_3330_cast_fp16))[name = tensor("op_3438_cast_fp16")]; tensor var_3440_equation_0 = const()[name = tensor("op_3440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3440_cast_fp16 = einsum(equation = var_3440_equation_0, values = (var_2984_cast_fp16, var_3331_cast_fp16))[name = tensor("op_3440_cast_fp16")]; tensor var_3442_equation_0 = const()[name = tensor("op_3442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3442_cast_fp16 = einsum(equation = var_3442_equation_0, values = (var_2984_cast_fp16, var_3332_cast_fp16))[name = tensor("op_3442_cast_fp16")]; tensor var_3444_equation_0 = const()[name = tensor("op_3444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3444_cast_fp16 = einsum(equation = var_3444_equation_0, values = (var_2984_cast_fp16, var_3333_cast_fp16))[name = tensor("op_3444_cast_fp16")]; tensor var_3446_equation_0 = const()[name = tensor("op_3446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3446_cast_fp16 = einsum(equation = var_3446_equation_0, values = (var_2984_cast_fp16, var_3334_cast_fp16))[name = tensor("op_3446_cast_fp16")]; tensor var_3448_equation_0 = const()[name = tensor("op_3448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3448_cast_fp16 = einsum(equation = var_3448_equation_0, values = (var_2984_cast_fp16, var_3335_cast_fp16))[name = tensor("op_3448_cast_fp16")]; tensor var_3450_equation_0 = const()[name = tensor("op_3450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3450_cast_fp16 = einsum(equation = var_3450_equation_0, values = (var_2984_cast_fp16, var_3336_cast_fp16))[name = tensor("op_3450_cast_fp16")]; tensor var_3452_equation_0 = const()[name = tensor("op_3452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3452_cast_fp16 = einsum(equation = var_3452_equation_0, values = (var_2988_cast_fp16, var_3337_cast_fp16))[name = tensor("op_3452_cast_fp16")]; tensor var_3454_equation_0 = const()[name = tensor("op_3454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3454_cast_fp16 = einsum(equation = var_3454_equation_0, values = (var_2988_cast_fp16, var_3338_cast_fp16))[name = tensor("op_3454_cast_fp16")]; tensor var_3456_equation_0 = const()[name = tensor("op_3456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3456_cast_fp16 = einsum(equation = var_3456_equation_0, values = (var_2988_cast_fp16, var_3339_cast_fp16))[name = tensor("op_3456_cast_fp16")]; tensor var_3458_equation_0 = const()[name = tensor("op_3458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3458_cast_fp16 = einsum(equation = var_3458_equation_0, values = (var_2988_cast_fp16, var_3340_cast_fp16))[name = tensor("op_3458_cast_fp16")]; tensor var_3460_equation_0 = const()[name = tensor("op_3460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3460_cast_fp16 = einsum(equation = var_3460_equation_0, values = (var_2988_cast_fp16, var_3341_cast_fp16))[name = tensor("op_3460_cast_fp16")]; tensor var_3462_equation_0 = const()[name = tensor("op_3462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3462_cast_fp16 = einsum(equation = var_3462_equation_0, values = (var_2988_cast_fp16, var_3342_cast_fp16))[name = tensor("op_3462_cast_fp16")]; tensor var_3464_equation_0 = const()[name = tensor("op_3464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3464_cast_fp16 = einsum(equation = var_3464_equation_0, values = (var_2992_cast_fp16, var_3343_cast_fp16))[name = tensor("op_3464_cast_fp16")]; tensor var_3466_equation_0 = const()[name = tensor("op_3466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3466_cast_fp16 = einsum(equation = var_3466_equation_0, values = (var_2992_cast_fp16, var_3344_cast_fp16))[name = tensor("op_3466_cast_fp16")]; tensor var_3468_equation_0 = const()[name = tensor("op_3468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3468_cast_fp16 = einsum(equation = var_3468_equation_0, values = (var_2992_cast_fp16, var_3345_cast_fp16))[name = tensor("op_3468_cast_fp16")]; tensor var_3470_equation_0 = const()[name = tensor("op_3470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3470_cast_fp16 = einsum(equation = var_3470_equation_0, values = (var_2992_cast_fp16, var_3346_cast_fp16))[name = tensor("op_3470_cast_fp16")]; tensor var_3472_equation_0 = const()[name = tensor("op_3472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3472_cast_fp16 = einsum(equation = var_3472_equation_0, values = (var_2992_cast_fp16, var_3347_cast_fp16))[name = tensor("op_3472_cast_fp16")]; tensor var_3474_equation_0 = const()[name = tensor("op_3474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3474_cast_fp16 = einsum(equation = var_3474_equation_0, values = (var_2992_cast_fp16, var_3348_cast_fp16))[name = tensor("op_3474_cast_fp16")]; tensor var_3476_equation_0 = const()[name = tensor("op_3476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3476_cast_fp16 = einsum(equation = var_3476_equation_0, values = (var_2996_cast_fp16, var_3349_cast_fp16))[name = tensor("op_3476_cast_fp16")]; tensor var_3478_equation_0 = const()[name = tensor("op_3478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3478_cast_fp16 = einsum(equation = var_3478_equation_0, values = (var_2996_cast_fp16, var_3350_cast_fp16))[name = tensor("op_3478_cast_fp16")]; tensor var_3480_equation_0 = const()[name = tensor("op_3480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3480_cast_fp16 = einsum(equation = var_3480_equation_0, values = (var_2996_cast_fp16, var_3351_cast_fp16))[name = tensor("op_3480_cast_fp16")]; tensor var_3482_equation_0 = const()[name = tensor("op_3482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3482_cast_fp16 = einsum(equation = var_3482_equation_0, values = (var_2996_cast_fp16, var_3352_cast_fp16))[name = tensor("op_3482_cast_fp16")]; tensor var_3484_equation_0 = const()[name = tensor("op_3484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3484_cast_fp16 = einsum(equation = var_3484_equation_0, values = (var_2996_cast_fp16, var_3353_cast_fp16))[name = tensor("op_3484_cast_fp16")]; tensor var_3486_equation_0 = const()[name = tensor("op_3486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3486_cast_fp16 = einsum(equation = var_3486_equation_0, values = (var_2996_cast_fp16, var_3354_cast_fp16))[name = tensor("op_3486_cast_fp16")]; tensor var_3488_equation_0 = const()[name = tensor("op_3488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3488_cast_fp16 = einsum(equation = var_3488_equation_0, values = (var_3000_cast_fp16, var_3355_cast_fp16))[name = tensor("op_3488_cast_fp16")]; tensor var_3490_equation_0 = const()[name = tensor("op_3490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3490_cast_fp16 = einsum(equation = var_3490_equation_0, values = (var_3000_cast_fp16, var_3356_cast_fp16))[name = tensor("op_3490_cast_fp16")]; tensor var_3492_equation_0 = const()[name = tensor("op_3492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3492_cast_fp16 = einsum(equation = var_3492_equation_0, values = (var_3000_cast_fp16, var_3357_cast_fp16))[name = tensor("op_3492_cast_fp16")]; tensor var_3494_equation_0 = const()[name = tensor("op_3494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3494_cast_fp16 = einsum(equation = var_3494_equation_0, values = (var_3000_cast_fp16, var_3358_cast_fp16))[name = tensor("op_3494_cast_fp16")]; tensor var_3496_equation_0 = const()[name = tensor("op_3496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3496_cast_fp16 = einsum(equation = var_3496_equation_0, values = (var_3000_cast_fp16, var_3359_cast_fp16))[name = tensor("op_3496_cast_fp16")]; tensor var_3498_equation_0 = const()[name = tensor("op_3498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3498_cast_fp16 = einsum(equation = var_3498_equation_0, values = (var_3000_cast_fp16, var_3360_cast_fp16))[name = tensor("op_3498_cast_fp16")]; tensor var_3500_equation_0 = const()[name = tensor("op_3500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3500_cast_fp16 = einsum(equation = var_3500_equation_0, values = (var_3004_cast_fp16, var_3361_cast_fp16))[name = tensor("op_3500_cast_fp16")]; tensor var_3502_equation_0 = const()[name = tensor("op_3502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3502_cast_fp16 = einsum(equation = var_3502_equation_0, values = (var_3004_cast_fp16, var_3362_cast_fp16))[name = tensor("op_3502_cast_fp16")]; tensor var_3504_equation_0 = const()[name = tensor("op_3504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3504_cast_fp16 = einsum(equation = var_3504_equation_0, values = (var_3004_cast_fp16, var_3363_cast_fp16))[name = tensor("op_3504_cast_fp16")]; tensor var_3506_equation_0 = const()[name = tensor("op_3506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3506_cast_fp16 = einsum(equation = var_3506_equation_0, values = (var_3004_cast_fp16, var_3364_cast_fp16))[name = tensor("op_3506_cast_fp16")]; tensor var_3508_equation_0 = const()[name = tensor("op_3508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3508_cast_fp16 = einsum(equation = var_3508_equation_0, values = (var_3004_cast_fp16, var_3365_cast_fp16))[name = tensor("op_3508_cast_fp16")]; tensor var_3510_equation_0 = const()[name = tensor("op_3510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3510_cast_fp16 = einsum(equation = var_3510_equation_0, values = (var_3004_cast_fp16, var_3366_cast_fp16))[name = tensor("op_3510_cast_fp16")]; tensor var_3512_interleave_0 = const()[name = tensor("op_3512_interleave_0"), val = tensor(false)]; tensor var_3512_cast_fp16 = concat(axis = var_2723, interleave = var_3512_interleave_0, values = (var_3368_cast_fp16, var_3370_cast_fp16, var_3372_cast_fp16, var_3374_cast_fp16, var_3376_cast_fp16, var_3378_cast_fp16))[name = tensor("op_3512_cast_fp16")]; tensor var_3514_interleave_0 = const()[name = tensor("op_3514_interleave_0"), val = tensor(false)]; tensor var_3514_cast_fp16 = concat(axis = var_2723, interleave = var_3514_interleave_0, values = (var_3380_cast_fp16, var_3382_cast_fp16, var_3384_cast_fp16, var_3386_cast_fp16, var_3388_cast_fp16, var_3390_cast_fp16))[name = tensor("op_3514_cast_fp16")]; tensor var_3516_interleave_0 = const()[name = tensor("op_3516_interleave_0"), val = tensor(false)]; tensor var_3516_cast_fp16 = concat(axis = var_2723, interleave = var_3516_interleave_0, values = (var_3392_cast_fp16, var_3394_cast_fp16, var_3396_cast_fp16, var_3398_cast_fp16, var_3400_cast_fp16, var_3402_cast_fp16))[name = tensor("op_3516_cast_fp16")]; tensor var_3518_interleave_0 = const()[name = tensor("op_3518_interleave_0"), val = tensor(false)]; tensor var_3518_cast_fp16 = concat(axis = var_2723, interleave = var_3518_interleave_0, values = (var_3404_cast_fp16, var_3406_cast_fp16, var_3408_cast_fp16, var_3410_cast_fp16, var_3412_cast_fp16, var_3414_cast_fp16))[name = tensor("op_3518_cast_fp16")]; tensor var_3520_interleave_0 = const()[name = tensor("op_3520_interleave_0"), val = tensor(false)]; tensor var_3520_cast_fp16 = concat(axis = var_2723, interleave = var_3520_interleave_0, values = (var_3416_cast_fp16, var_3418_cast_fp16, var_3420_cast_fp16, var_3422_cast_fp16, var_3424_cast_fp16, var_3426_cast_fp16))[name = tensor("op_3520_cast_fp16")]; tensor var_3522_interleave_0 = const()[name = tensor("op_3522_interleave_0"), val = tensor(false)]; tensor var_3522_cast_fp16 = concat(axis = var_2723, interleave = var_3522_interleave_0, values = (var_3428_cast_fp16, var_3430_cast_fp16, var_3432_cast_fp16, var_3434_cast_fp16, var_3436_cast_fp16, var_3438_cast_fp16))[name = tensor("op_3522_cast_fp16")]; tensor var_3524_interleave_0 = const()[name = tensor("op_3524_interleave_0"), val = tensor(false)]; tensor var_3524_cast_fp16 = concat(axis = var_2723, interleave = var_3524_interleave_0, values = (var_3440_cast_fp16, var_3442_cast_fp16, var_3444_cast_fp16, var_3446_cast_fp16, var_3448_cast_fp16, var_3450_cast_fp16))[name = tensor("op_3524_cast_fp16")]; tensor var_3526_interleave_0 = const()[name = tensor("op_3526_interleave_0"), val = tensor(false)]; tensor var_3526_cast_fp16 = concat(axis = var_2723, interleave = var_3526_interleave_0, values = (var_3452_cast_fp16, var_3454_cast_fp16, var_3456_cast_fp16, var_3458_cast_fp16, var_3460_cast_fp16, var_3462_cast_fp16))[name = tensor("op_3526_cast_fp16")]; tensor var_3528_interleave_0 = const()[name = tensor("op_3528_interleave_0"), val = tensor(false)]; tensor var_3528_cast_fp16 = concat(axis = var_2723, interleave = var_3528_interleave_0, values = (var_3464_cast_fp16, var_3466_cast_fp16, var_3468_cast_fp16, var_3470_cast_fp16, var_3472_cast_fp16, var_3474_cast_fp16))[name = tensor("op_3528_cast_fp16")]; tensor var_3530_interleave_0 = const()[name = tensor("op_3530_interleave_0"), val = tensor(false)]; tensor var_3530_cast_fp16 = concat(axis = var_2723, interleave = var_3530_interleave_0, values = (var_3476_cast_fp16, var_3478_cast_fp16, var_3480_cast_fp16, var_3482_cast_fp16, var_3484_cast_fp16, var_3486_cast_fp16))[name = tensor("op_3530_cast_fp16")]; tensor var_3532_interleave_0 = const()[name = tensor("op_3532_interleave_0"), val = tensor(false)]; tensor var_3532_cast_fp16 = concat(axis = var_2723, interleave = var_3532_interleave_0, values = (var_3488_cast_fp16, var_3490_cast_fp16, var_3492_cast_fp16, var_3494_cast_fp16, var_3496_cast_fp16, var_3498_cast_fp16))[name = tensor("op_3532_cast_fp16")]; tensor var_3534_interleave_0 = const()[name = tensor("op_3534_interleave_0"), val = tensor(false)]; tensor var_3534_cast_fp16 = concat(axis = var_2723, interleave = var_3534_interleave_0, values = (var_3500_cast_fp16, var_3502_cast_fp16, var_3504_cast_fp16, var_3506_cast_fp16, var_3508_cast_fp16, var_3510_cast_fp16))[name = tensor("op_3534_cast_fp16")]; tensor input_25_interleave_0 = const()[name = tensor("input_25_interleave_0"), val = tensor(false)]; tensor input_25_cast_fp16 = concat(axis = var_2739, interleave = input_25_interleave_0, values = (var_3512_cast_fp16, var_3514_cast_fp16, var_3516_cast_fp16, var_3518_cast_fp16, var_3520_cast_fp16, var_3522_cast_fp16, var_3524_cast_fp16, var_3526_cast_fp16, var_3528_cast_fp16, var_3530_cast_fp16, var_3532_cast_fp16, var_3534_cast_fp16))[name = tensor("input_25_cast_fp16")]; tensor obj_15_pad_type_0 = const()[name = tensor("obj_15_pad_type_0"), val = tensor("valid")]; tensor obj_15_strides_0 = const()[name = tensor("obj_15_strides_0"), val = tensor([1, 1])]; tensor obj_15_pad_0 = const()[name = tensor("obj_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_15_dilations_0 = const()[name = tensor("obj_15_dilations_0"), val = tensor([1, 1])]; tensor obj_15_groups_0 = const()[name = tensor("obj_15_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52289280)))]; tensor layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53468992)))]; tensor obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_15_dilations_0, groups = obj_15_groups_0, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = obj_15_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("obj_15_cast_fp16")]; tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; tensor var_3553_to_fp16 = const()[name = tensor("op_3553_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_3553_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53470592)))]; tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53472192)))]; tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor input_29_pad_type_0 = const()[name = tensor("input_29_pad_type_0"), val = tensor("valid")]; tensor input_29_strides_0 = const()[name = tensor("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = tensor("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = tensor("input_29_dilations_0"), val = tensor([1, 1])]; tensor input_29_groups_0 = const()[name = tensor("input_29_groups_0"), val = tensor(1)]; tensor layers_3_fc1_weight_to_fp16 = const()[name = tensor("layers_3_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53473792)))]; tensor layers_3_fc1_bias_to_fp16 = const()[name = tensor("layers_3_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58192448)))]; tensor input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor hidden_states_11_pad_type_0 = const()[name = tensor("hidden_states_11_pad_type_0"), val = tensor("valid")]; tensor hidden_states_11_strides_0 = const()[name = tensor("hidden_states_11_strides_0"), val = tensor([1, 1])]; tensor hidden_states_11_pad_0 = const()[name = tensor("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_11_dilations_0 = const()[name = tensor("hidden_states_11_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_11_groups_0 = const()[name = tensor("hidden_states_11_groups_0"), val = tensor(1)]; tensor layers_3_fc2_weight_to_fp16 = const()[name = tensor("layers_3_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58198656)))]; tensor layers_3_fc2_bias_to_fp16 = const()[name = tensor("layers_3_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62917312)))]; tensor hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_11_dilations_0, groups = hidden_states_11_groups_0, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = hidden_states_11_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; tensor var_3585 = const()[name = tensor("op_3585"), val = tensor(3)]; tensor var_3601 = const()[name = tensor("op_3601"), val = tensor(1)]; tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; tensor var_3618_to_fp16 = const()[name = tensor("op_3618_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_3618_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62918912)))]; tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62920512)))]; tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; tensor query_9_pad_type_0 = const()[name = tensor("query_9_pad_type_0"), val = tensor("valid")]; tensor query_9_strides_0 = const()[name = tensor("query_9_strides_0"), val = tensor([1, 1])]; tensor query_9_pad_0 = const()[name = tensor("query_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_9_dilations_0 = const()[name = tensor("query_9_dilations_0"), val = tensor([1, 1])]; tensor query_9_groups_0 = const()[name = tensor("query_9_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62922112)))]; tensor layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64101824)))]; tensor query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = query_9_dilations_0, groups = query_9_groups_0, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = query_9_strides_0, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("query_9_cast_fp16")]; tensor key_9_pad_type_0 = const()[name = tensor("key_9_pad_type_0"), val = tensor("valid")]; tensor key_9_strides_0 = const()[name = tensor("key_9_strides_0"), val = tensor([1, 1])]; tensor key_9_pad_0 = const()[name = tensor("key_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_9_dilations_0 = const()[name = tensor("key_9_dilations_0"), val = tensor([1, 1])]; tensor key_9_groups_0 = const()[name = tensor("key_9_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64103424)))]; tensor key_9_cast_fp16 = conv(dilations = key_9_dilations_0, groups = key_9_groups_0, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = key_9_strides_0, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("key_9_cast_fp16")]; tensor value_9_pad_type_0 = const()[name = tensor("value_9_pad_type_0"), val = tensor("valid")]; tensor value_9_strides_0 = const()[name = tensor("value_9_strides_0"), val = tensor([1, 1])]; tensor value_9_pad_0 = const()[name = tensor("value_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_9_dilations_0 = const()[name = tensor("value_9_dilations_0"), val = tensor([1, 1])]; tensor value_9_groups_0 = const()[name = tensor("value_9_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65283136)))]; tensor layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66462848)))]; tensor value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = value_9_dilations_0, groups = value_9_groups_0, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = value_9_strides_0, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("value_9_cast_fp16")]; tensor var_3653_begin_0 = const()[name = tensor("op_3653_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3653_end_0 = const()[name = tensor("op_3653_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3653_end_mask_0 = const()[name = tensor("op_3653_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3653_cast_fp16 = slice_by_index(begin = var_3653_begin_0, end = var_3653_end_0, end_mask = var_3653_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3653_cast_fp16")]; tensor var_3657_begin_0 = const()[name = tensor("op_3657_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_3657_end_0 = const()[name = tensor("op_3657_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_3657_end_mask_0 = const()[name = tensor("op_3657_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3657_cast_fp16 = slice_by_index(begin = var_3657_begin_0, end = var_3657_end_0, end_mask = var_3657_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3657_cast_fp16")]; tensor var_3661_begin_0 = const()[name = tensor("op_3661_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_3661_end_0 = const()[name = tensor("op_3661_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_3661_end_mask_0 = const()[name = tensor("op_3661_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3661_cast_fp16 = slice_by_index(begin = var_3661_begin_0, end = var_3661_end_0, end_mask = var_3661_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3661_cast_fp16")]; tensor var_3665_begin_0 = const()[name = tensor("op_3665_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_3665_end_0 = const()[name = tensor("op_3665_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_3665_end_mask_0 = const()[name = tensor("op_3665_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3665_cast_fp16 = slice_by_index(begin = var_3665_begin_0, end = var_3665_end_0, end_mask = var_3665_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3665_cast_fp16")]; tensor var_3669_begin_0 = const()[name = tensor("op_3669_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_3669_end_0 = const()[name = tensor("op_3669_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_3669_end_mask_0 = const()[name = tensor("op_3669_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3669_cast_fp16 = slice_by_index(begin = var_3669_begin_0, end = var_3669_end_0, end_mask = var_3669_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3669_cast_fp16")]; tensor var_3673_begin_0 = const()[name = tensor("op_3673_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3673_end_0 = const()[name = tensor("op_3673_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_3673_end_mask_0 = const()[name = tensor("op_3673_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3673_cast_fp16 = slice_by_index(begin = var_3673_begin_0, end = var_3673_end_0, end_mask = var_3673_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3673_cast_fp16")]; tensor var_3677_begin_0 = const()[name = tensor("op_3677_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_3677_end_0 = const()[name = tensor("op_3677_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_3677_end_mask_0 = const()[name = tensor("op_3677_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3677_cast_fp16 = slice_by_index(begin = var_3677_begin_0, end = var_3677_end_0, end_mask = var_3677_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3677_cast_fp16")]; tensor var_3681_begin_0 = const()[name = tensor("op_3681_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_3681_end_0 = const()[name = tensor("op_3681_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_3681_end_mask_0 = const()[name = tensor("op_3681_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3681_cast_fp16 = slice_by_index(begin = var_3681_begin_0, end = var_3681_end_0, end_mask = var_3681_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3681_cast_fp16")]; tensor var_3685_begin_0 = const()[name = tensor("op_3685_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_3685_end_0 = const()[name = tensor("op_3685_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_3685_end_mask_0 = const()[name = tensor("op_3685_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3685_cast_fp16 = slice_by_index(begin = var_3685_begin_0, end = var_3685_end_0, end_mask = var_3685_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3685_cast_fp16")]; tensor var_3689_begin_0 = const()[name = tensor("op_3689_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_3689_end_0 = const()[name = tensor("op_3689_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_3689_end_mask_0 = const()[name = tensor("op_3689_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3689_cast_fp16 = slice_by_index(begin = var_3689_begin_0, end = var_3689_end_0, end_mask = var_3689_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3689_cast_fp16")]; tensor var_3693_begin_0 = const()[name = tensor("op_3693_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_3693_end_0 = const()[name = tensor("op_3693_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_3693_end_mask_0 = const()[name = tensor("op_3693_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3693_cast_fp16 = slice_by_index(begin = var_3693_begin_0, end = var_3693_end_0, end_mask = var_3693_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3693_cast_fp16")]; tensor var_3697_begin_0 = const()[name = tensor("op_3697_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_3697_end_0 = const()[name = tensor("op_3697_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_3697_end_mask_0 = const()[name = tensor("op_3697_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3697_cast_fp16 = slice_by_index(begin = var_3697_begin_0, end = var_3697_end_0, end_mask = var_3697_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_3697_cast_fp16")]; tensor var_3700_begin_0 = const()[name = tensor("op_3700_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3700_end_0 = const()[name = tensor("op_3700_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3700_end_mask_0 = const()[name = tensor("op_3700_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3700_cast_fp16 = slice_by_index(begin = var_3700_begin_0, end = var_3700_end_0, end_mask = var_3700_end_mask_0, x = var_3653_cast_fp16)[name = tensor("op_3700_cast_fp16")]; tensor var_3701_begin_0 = const()[name = tensor("op_3701_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3701_end_0 = const()[name = tensor("op_3701_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3701_end_mask_0 = const()[name = tensor("op_3701_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3701_cast_fp16 = slice_by_index(begin = var_3701_begin_0, end = var_3701_end_0, end_mask = var_3701_end_mask_0, x = var_3653_cast_fp16)[name = tensor("op_3701_cast_fp16")]; tensor var_3702_begin_0 = const()[name = tensor("op_3702_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3702_end_0 = const()[name = tensor("op_3702_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3702_end_mask_0 = const()[name = tensor("op_3702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3702_cast_fp16 = slice_by_index(begin = var_3702_begin_0, end = var_3702_end_0, end_mask = var_3702_end_mask_0, x = var_3653_cast_fp16)[name = tensor("op_3702_cast_fp16")]; tensor var_3703_begin_0 = const()[name = tensor("op_3703_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3703_end_0 = const()[name = tensor("op_3703_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3703_end_mask_0 = const()[name = tensor("op_3703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3703_cast_fp16 = slice_by_index(begin = var_3703_begin_0, end = var_3703_end_0, end_mask = var_3703_end_mask_0, x = var_3653_cast_fp16)[name = tensor("op_3703_cast_fp16")]; tensor var_3704_begin_0 = const()[name = tensor("op_3704_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3704_end_0 = const()[name = tensor("op_3704_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3704_end_mask_0 = const()[name = tensor("op_3704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3704_cast_fp16 = slice_by_index(begin = var_3704_begin_0, end = var_3704_end_0, end_mask = var_3704_end_mask_0, x = var_3653_cast_fp16)[name = tensor("op_3704_cast_fp16")]; tensor var_3705_begin_0 = const()[name = tensor("op_3705_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3705_end_0 = const()[name = tensor("op_3705_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3705_end_mask_0 = const()[name = tensor("op_3705_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3705_cast_fp16 = slice_by_index(begin = var_3705_begin_0, end = var_3705_end_0, end_mask = var_3705_end_mask_0, x = var_3653_cast_fp16)[name = tensor("op_3705_cast_fp16")]; tensor var_3706_begin_0 = const()[name = tensor("op_3706_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3706_end_0 = const()[name = tensor("op_3706_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3706_end_mask_0 = const()[name = tensor("op_3706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3706_cast_fp16 = slice_by_index(begin = var_3706_begin_0, end = var_3706_end_0, end_mask = var_3706_end_mask_0, x = var_3657_cast_fp16)[name = tensor("op_3706_cast_fp16")]; tensor var_3707_begin_0 = const()[name = tensor("op_3707_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3707_end_0 = const()[name = tensor("op_3707_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3707_end_mask_0 = const()[name = tensor("op_3707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3707_cast_fp16 = slice_by_index(begin = var_3707_begin_0, end = var_3707_end_0, end_mask = var_3707_end_mask_0, x = var_3657_cast_fp16)[name = tensor("op_3707_cast_fp16")]; tensor var_3708_begin_0 = const()[name = tensor("op_3708_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3708_end_0 = const()[name = tensor("op_3708_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3708_end_mask_0 = const()[name = tensor("op_3708_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3708_cast_fp16 = slice_by_index(begin = var_3708_begin_0, end = var_3708_end_0, end_mask = var_3708_end_mask_0, x = var_3657_cast_fp16)[name = tensor("op_3708_cast_fp16")]; tensor var_3709_begin_0 = const()[name = tensor("op_3709_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3709_end_0 = const()[name = tensor("op_3709_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3709_end_mask_0 = const()[name = tensor("op_3709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3709_cast_fp16 = slice_by_index(begin = var_3709_begin_0, end = var_3709_end_0, end_mask = var_3709_end_mask_0, x = var_3657_cast_fp16)[name = tensor("op_3709_cast_fp16")]; tensor var_3710_begin_0 = const()[name = tensor("op_3710_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3710_end_0 = const()[name = tensor("op_3710_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3710_end_mask_0 = const()[name = tensor("op_3710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3710_cast_fp16 = slice_by_index(begin = var_3710_begin_0, end = var_3710_end_0, end_mask = var_3710_end_mask_0, x = var_3657_cast_fp16)[name = tensor("op_3710_cast_fp16")]; tensor var_3711_begin_0 = const()[name = tensor("op_3711_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3711_end_0 = const()[name = tensor("op_3711_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3711_end_mask_0 = const()[name = tensor("op_3711_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3711_cast_fp16 = slice_by_index(begin = var_3711_begin_0, end = var_3711_end_0, end_mask = var_3711_end_mask_0, x = var_3657_cast_fp16)[name = tensor("op_3711_cast_fp16")]; tensor var_3712_begin_0 = const()[name = tensor("op_3712_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3712_end_0 = const()[name = tensor("op_3712_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3712_end_mask_0 = const()[name = tensor("op_3712_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3712_cast_fp16 = slice_by_index(begin = var_3712_begin_0, end = var_3712_end_0, end_mask = var_3712_end_mask_0, x = var_3661_cast_fp16)[name = tensor("op_3712_cast_fp16")]; tensor var_3713_begin_0 = const()[name = tensor("op_3713_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3713_end_0 = const()[name = tensor("op_3713_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3713_end_mask_0 = const()[name = tensor("op_3713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3713_cast_fp16 = slice_by_index(begin = var_3713_begin_0, end = var_3713_end_0, end_mask = var_3713_end_mask_0, x = var_3661_cast_fp16)[name = tensor("op_3713_cast_fp16")]; tensor var_3714_begin_0 = const()[name = tensor("op_3714_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3714_end_0 = const()[name = tensor("op_3714_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3714_end_mask_0 = const()[name = tensor("op_3714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3714_cast_fp16 = slice_by_index(begin = var_3714_begin_0, end = var_3714_end_0, end_mask = var_3714_end_mask_0, x = var_3661_cast_fp16)[name = tensor("op_3714_cast_fp16")]; tensor var_3715_begin_0 = const()[name = tensor("op_3715_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3715_end_0 = const()[name = tensor("op_3715_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3715_end_mask_0 = const()[name = tensor("op_3715_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3715_cast_fp16 = slice_by_index(begin = var_3715_begin_0, end = var_3715_end_0, end_mask = var_3715_end_mask_0, x = var_3661_cast_fp16)[name = tensor("op_3715_cast_fp16")]; tensor var_3716_begin_0 = const()[name = tensor("op_3716_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3716_end_0 = const()[name = tensor("op_3716_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3716_end_mask_0 = const()[name = tensor("op_3716_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3716_cast_fp16 = slice_by_index(begin = var_3716_begin_0, end = var_3716_end_0, end_mask = var_3716_end_mask_0, x = var_3661_cast_fp16)[name = tensor("op_3716_cast_fp16")]; tensor var_3717_begin_0 = const()[name = tensor("op_3717_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3717_end_0 = const()[name = tensor("op_3717_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3717_end_mask_0 = const()[name = tensor("op_3717_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = var_3661_cast_fp16)[name = tensor("op_3717_cast_fp16")]; tensor var_3718_begin_0 = const()[name = tensor("op_3718_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3718_end_0 = const()[name = tensor("op_3718_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3718_end_mask_0 = const()[name = tensor("op_3718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3718_cast_fp16 = slice_by_index(begin = var_3718_begin_0, end = var_3718_end_0, end_mask = var_3718_end_mask_0, x = var_3665_cast_fp16)[name = tensor("op_3718_cast_fp16")]; tensor var_3719_begin_0 = const()[name = tensor("op_3719_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3719_end_0 = const()[name = tensor("op_3719_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3719_end_mask_0 = const()[name = tensor("op_3719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3719_cast_fp16 = slice_by_index(begin = var_3719_begin_0, end = var_3719_end_0, end_mask = var_3719_end_mask_0, x = var_3665_cast_fp16)[name = tensor("op_3719_cast_fp16")]; tensor var_3720_begin_0 = const()[name = tensor("op_3720_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3720_end_0 = const()[name = tensor("op_3720_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3720_end_mask_0 = const()[name = tensor("op_3720_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3720_cast_fp16 = slice_by_index(begin = var_3720_begin_0, end = var_3720_end_0, end_mask = var_3720_end_mask_0, x = var_3665_cast_fp16)[name = tensor("op_3720_cast_fp16")]; tensor var_3721_begin_0 = const()[name = tensor("op_3721_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3721_end_0 = const()[name = tensor("op_3721_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3721_end_mask_0 = const()[name = tensor("op_3721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3721_cast_fp16 = slice_by_index(begin = var_3721_begin_0, end = var_3721_end_0, end_mask = var_3721_end_mask_0, x = var_3665_cast_fp16)[name = tensor("op_3721_cast_fp16")]; tensor var_3722_begin_0 = const()[name = tensor("op_3722_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3722_end_0 = const()[name = tensor("op_3722_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3722_end_mask_0 = const()[name = tensor("op_3722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3722_cast_fp16 = slice_by_index(begin = var_3722_begin_0, end = var_3722_end_0, end_mask = var_3722_end_mask_0, x = var_3665_cast_fp16)[name = tensor("op_3722_cast_fp16")]; tensor var_3723_begin_0 = const()[name = tensor("op_3723_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3723_end_0 = const()[name = tensor("op_3723_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3723_end_mask_0 = const()[name = tensor("op_3723_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3723_cast_fp16 = slice_by_index(begin = var_3723_begin_0, end = var_3723_end_0, end_mask = var_3723_end_mask_0, x = var_3665_cast_fp16)[name = tensor("op_3723_cast_fp16")]; tensor var_3724_begin_0 = const()[name = tensor("op_3724_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3724_end_0 = const()[name = tensor("op_3724_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3724_end_mask_0 = const()[name = tensor("op_3724_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3724_cast_fp16 = slice_by_index(begin = var_3724_begin_0, end = var_3724_end_0, end_mask = var_3724_end_mask_0, x = var_3669_cast_fp16)[name = tensor("op_3724_cast_fp16")]; tensor var_3725_begin_0 = const()[name = tensor("op_3725_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3725_end_0 = const()[name = tensor("op_3725_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3725_end_mask_0 = const()[name = tensor("op_3725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3725_cast_fp16 = slice_by_index(begin = var_3725_begin_0, end = var_3725_end_0, end_mask = var_3725_end_mask_0, x = var_3669_cast_fp16)[name = tensor("op_3725_cast_fp16")]; tensor var_3726_begin_0 = const()[name = tensor("op_3726_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3726_end_0 = const()[name = tensor("op_3726_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3726_end_mask_0 = const()[name = tensor("op_3726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3726_cast_fp16 = slice_by_index(begin = var_3726_begin_0, end = var_3726_end_0, end_mask = var_3726_end_mask_0, x = var_3669_cast_fp16)[name = tensor("op_3726_cast_fp16")]; tensor var_3727_begin_0 = const()[name = tensor("op_3727_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3727_end_0 = const()[name = tensor("op_3727_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3727_end_mask_0 = const()[name = tensor("op_3727_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3727_cast_fp16 = slice_by_index(begin = var_3727_begin_0, end = var_3727_end_0, end_mask = var_3727_end_mask_0, x = var_3669_cast_fp16)[name = tensor("op_3727_cast_fp16")]; tensor var_3728_begin_0 = const()[name = tensor("op_3728_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3728_end_0 = const()[name = tensor("op_3728_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3728_end_mask_0 = const()[name = tensor("op_3728_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3728_cast_fp16 = slice_by_index(begin = var_3728_begin_0, end = var_3728_end_0, end_mask = var_3728_end_mask_0, x = var_3669_cast_fp16)[name = tensor("op_3728_cast_fp16")]; tensor var_3729_begin_0 = const()[name = tensor("op_3729_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3729_end_0 = const()[name = tensor("op_3729_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3729_end_mask_0 = const()[name = tensor("op_3729_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3729_cast_fp16 = slice_by_index(begin = var_3729_begin_0, end = var_3729_end_0, end_mask = var_3729_end_mask_0, x = var_3669_cast_fp16)[name = tensor("op_3729_cast_fp16")]; tensor var_3730_begin_0 = const()[name = tensor("op_3730_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3730_end_0 = const()[name = tensor("op_3730_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3730_end_mask_0 = const()[name = tensor("op_3730_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3730_cast_fp16 = slice_by_index(begin = var_3730_begin_0, end = var_3730_end_0, end_mask = var_3730_end_mask_0, x = var_3673_cast_fp16)[name = tensor("op_3730_cast_fp16")]; tensor var_3731_begin_0 = const()[name = tensor("op_3731_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3731_end_0 = const()[name = tensor("op_3731_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3731_end_mask_0 = const()[name = tensor("op_3731_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3731_cast_fp16 = slice_by_index(begin = var_3731_begin_0, end = var_3731_end_0, end_mask = var_3731_end_mask_0, x = var_3673_cast_fp16)[name = tensor("op_3731_cast_fp16")]; tensor var_3732_begin_0 = const()[name = tensor("op_3732_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3732_end_0 = const()[name = tensor("op_3732_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3732_end_mask_0 = const()[name = tensor("op_3732_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3732_cast_fp16 = slice_by_index(begin = var_3732_begin_0, end = var_3732_end_0, end_mask = var_3732_end_mask_0, x = var_3673_cast_fp16)[name = tensor("op_3732_cast_fp16")]; tensor var_3733_begin_0 = const()[name = tensor("op_3733_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3733_end_0 = const()[name = tensor("op_3733_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3733_end_mask_0 = const()[name = tensor("op_3733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3733_cast_fp16 = slice_by_index(begin = var_3733_begin_0, end = var_3733_end_0, end_mask = var_3733_end_mask_0, x = var_3673_cast_fp16)[name = tensor("op_3733_cast_fp16")]; tensor var_3734_begin_0 = const()[name = tensor("op_3734_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3734_end_0 = const()[name = tensor("op_3734_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3734_end_mask_0 = const()[name = tensor("op_3734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3734_cast_fp16 = slice_by_index(begin = var_3734_begin_0, end = var_3734_end_0, end_mask = var_3734_end_mask_0, x = var_3673_cast_fp16)[name = tensor("op_3734_cast_fp16")]; tensor var_3735_begin_0 = const()[name = tensor("op_3735_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3735_end_0 = const()[name = tensor("op_3735_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3735_end_mask_0 = const()[name = tensor("op_3735_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3735_cast_fp16 = slice_by_index(begin = var_3735_begin_0, end = var_3735_end_0, end_mask = var_3735_end_mask_0, x = var_3673_cast_fp16)[name = tensor("op_3735_cast_fp16")]; tensor var_3736_begin_0 = const()[name = tensor("op_3736_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3736_end_0 = const()[name = tensor("op_3736_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3736_end_mask_0 = const()[name = tensor("op_3736_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3736_cast_fp16 = slice_by_index(begin = var_3736_begin_0, end = var_3736_end_0, end_mask = var_3736_end_mask_0, x = var_3677_cast_fp16)[name = tensor("op_3736_cast_fp16")]; tensor var_3737_begin_0 = const()[name = tensor("op_3737_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3737_end_0 = const()[name = tensor("op_3737_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3737_end_mask_0 = const()[name = tensor("op_3737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3737_cast_fp16 = slice_by_index(begin = var_3737_begin_0, end = var_3737_end_0, end_mask = var_3737_end_mask_0, x = var_3677_cast_fp16)[name = tensor("op_3737_cast_fp16")]; tensor var_3738_begin_0 = const()[name = tensor("op_3738_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3738_end_0 = const()[name = tensor("op_3738_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3738_end_mask_0 = const()[name = tensor("op_3738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3738_cast_fp16 = slice_by_index(begin = var_3738_begin_0, end = var_3738_end_0, end_mask = var_3738_end_mask_0, x = var_3677_cast_fp16)[name = tensor("op_3738_cast_fp16")]; tensor var_3739_begin_0 = const()[name = tensor("op_3739_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3739_end_0 = const()[name = tensor("op_3739_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3739_end_mask_0 = const()[name = tensor("op_3739_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3739_cast_fp16 = slice_by_index(begin = var_3739_begin_0, end = var_3739_end_0, end_mask = var_3739_end_mask_0, x = var_3677_cast_fp16)[name = tensor("op_3739_cast_fp16")]; tensor var_3740_begin_0 = const()[name = tensor("op_3740_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3740_end_0 = const()[name = tensor("op_3740_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3740_end_mask_0 = const()[name = tensor("op_3740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3740_cast_fp16 = slice_by_index(begin = var_3740_begin_0, end = var_3740_end_0, end_mask = var_3740_end_mask_0, x = var_3677_cast_fp16)[name = tensor("op_3740_cast_fp16")]; tensor var_3741_begin_0 = const()[name = tensor("op_3741_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3741_end_0 = const()[name = tensor("op_3741_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3741_end_mask_0 = const()[name = tensor("op_3741_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3741_cast_fp16 = slice_by_index(begin = var_3741_begin_0, end = var_3741_end_0, end_mask = var_3741_end_mask_0, x = var_3677_cast_fp16)[name = tensor("op_3741_cast_fp16")]; tensor var_3742_begin_0 = const()[name = tensor("op_3742_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3742_end_0 = const()[name = tensor("op_3742_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3742_end_mask_0 = const()[name = tensor("op_3742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3742_cast_fp16 = slice_by_index(begin = var_3742_begin_0, end = var_3742_end_0, end_mask = var_3742_end_mask_0, x = var_3681_cast_fp16)[name = tensor("op_3742_cast_fp16")]; tensor var_3743_begin_0 = const()[name = tensor("op_3743_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3743_end_0 = const()[name = tensor("op_3743_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3743_end_mask_0 = const()[name = tensor("op_3743_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3743_cast_fp16 = slice_by_index(begin = var_3743_begin_0, end = var_3743_end_0, end_mask = var_3743_end_mask_0, x = var_3681_cast_fp16)[name = tensor("op_3743_cast_fp16")]; tensor var_3744_begin_0 = const()[name = tensor("op_3744_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3744_end_0 = const()[name = tensor("op_3744_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3744_end_mask_0 = const()[name = tensor("op_3744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3744_cast_fp16 = slice_by_index(begin = var_3744_begin_0, end = var_3744_end_0, end_mask = var_3744_end_mask_0, x = var_3681_cast_fp16)[name = tensor("op_3744_cast_fp16")]; tensor var_3745_begin_0 = const()[name = tensor("op_3745_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3745_end_0 = const()[name = tensor("op_3745_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3745_end_mask_0 = const()[name = tensor("op_3745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3745_cast_fp16 = slice_by_index(begin = var_3745_begin_0, end = var_3745_end_0, end_mask = var_3745_end_mask_0, x = var_3681_cast_fp16)[name = tensor("op_3745_cast_fp16")]; tensor var_3746_begin_0 = const()[name = tensor("op_3746_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3746_end_0 = const()[name = tensor("op_3746_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3746_end_mask_0 = const()[name = tensor("op_3746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3746_cast_fp16 = slice_by_index(begin = var_3746_begin_0, end = var_3746_end_0, end_mask = var_3746_end_mask_0, x = var_3681_cast_fp16)[name = tensor("op_3746_cast_fp16")]; tensor var_3747_begin_0 = const()[name = tensor("op_3747_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3747_end_0 = const()[name = tensor("op_3747_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3747_end_mask_0 = const()[name = tensor("op_3747_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3747_cast_fp16 = slice_by_index(begin = var_3747_begin_0, end = var_3747_end_0, end_mask = var_3747_end_mask_0, x = var_3681_cast_fp16)[name = tensor("op_3747_cast_fp16")]; tensor var_3748_begin_0 = const()[name = tensor("op_3748_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3748_end_0 = const()[name = tensor("op_3748_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3748_end_mask_0 = const()[name = tensor("op_3748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3748_cast_fp16 = slice_by_index(begin = var_3748_begin_0, end = var_3748_end_0, end_mask = var_3748_end_mask_0, x = var_3685_cast_fp16)[name = tensor("op_3748_cast_fp16")]; tensor var_3749_begin_0 = const()[name = tensor("op_3749_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3749_end_0 = const()[name = tensor("op_3749_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3749_end_mask_0 = const()[name = tensor("op_3749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3749_cast_fp16 = slice_by_index(begin = var_3749_begin_0, end = var_3749_end_0, end_mask = var_3749_end_mask_0, x = var_3685_cast_fp16)[name = tensor("op_3749_cast_fp16")]; tensor var_3750_begin_0 = const()[name = tensor("op_3750_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3750_end_0 = const()[name = tensor("op_3750_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3750_end_mask_0 = const()[name = tensor("op_3750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3750_cast_fp16 = slice_by_index(begin = var_3750_begin_0, end = var_3750_end_0, end_mask = var_3750_end_mask_0, x = var_3685_cast_fp16)[name = tensor("op_3750_cast_fp16")]; tensor var_3751_begin_0 = const()[name = tensor("op_3751_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3751_end_0 = const()[name = tensor("op_3751_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3751_end_mask_0 = const()[name = tensor("op_3751_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3751_cast_fp16 = slice_by_index(begin = var_3751_begin_0, end = var_3751_end_0, end_mask = var_3751_end_mask_0, x = var_3685_cast_fp16)[name = tensor("op_3751_cast_fp16")]; tensor var_3752_begin_0 = const()[name = tensor("op_3752_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3752_end_0 = const()[name = tensor("op_3752_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3752_end_mask_0 = const()[name = tensor("op_3752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3752_cast_fp16 = slice_by_index(begin = var_3752_begin_0, end = var_3752_end_0, end_mask = var_3752_end_mask_0, x = var_3685_cast_fp16)[name = tensor("op_3752_cast_fp16")]; tensor var_3753_begin_0 = const()[name = tensor("op_3753_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3753_end_0 = const()[name = tensor("op_3753_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3753_end_mask_0 = const()[name = tensor("op_3753_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3753_cast_fp16 = slice_by_index(begin = var_3753_begin_0, end = var_3753_end_0, end_mask = var_3753_end_mask_0, x = var_3685_cast_fp16)[name = tensor("op_3753_cast_fp16")]; tensor var_3754_begin_0 = const()[name = tensor("op_3754_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3754_end_0 = const()[name = tensor("op_3754_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3754_end_mask_0 = const()[name = tensor("op_3754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3754_cast_fp16 = slice_by_index(begin = var_3754_begin_0, end = var_3754_end_0, end_mask = var_3754_end_mask_0, x = var_3689_cast_fp16)[name = tensor("op_3754_cast_fp16")]; tensor var_3755_begin_0 = const()[name = tensor("op_3755_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3755_end_0 = const()[name = tensor("op_3755_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3755_end_mask_0 = const()[name = tensor("op_3755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3755_cast_fp16 = slice_by_index(begin = var_3755_begin_0, end = var_3755_end_0, end_mask = var_3755_end_mask_0, x = var_3689_cast_fp16)[name = tensor("op_3755_cast_fp16")]; tensor var_3756_begin_0 = const()[name = tensor("op_3756_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3756_end_0 = const()[name = tensor("op_3756_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3756_end_mask_0 = const()[name = tensor("op_3756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3756_cast_fp16 = slice_by_index(begin = var_3756_begin_0, end = var_3756_end_0, end_mask = var_3756_end_mask_0, x = var_3689_cast_fp16)[name = tensor("op_3756_cast_fp16")]; tensor var_3757_begin_0 = const()[name = tensor("op_3757_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3757_end_0 = const()[name = tensor("op_3757_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3757_end_mask_0 = const()[name = tensor("op_3757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3757_cast_fp16 = slice_by_index(begin = var_3757_begin_0, end = var_3757_end_0, end_mask = var_3757_end_mask_0, x = var_3689_cast_fp16)[name = tensor("op_3757_cast_fp16")]; tensor var_3758_begin_0 = const()[name = tensor("op_3758_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3758_end_0 = const()[name = tensor("op_3758_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3758_end_mask_0 = const()[name = tensor("op_3758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3758_cast_fp16 = slice_by_index(begin = var_3758_begin_0, end = var_3758_end_0, end_mask = var_3758_end_mask_0, x = var_3689_cast_fp16)[name = tensor("op_3758_cast_fp16")]; tensor var_3759_begin_0 = const()[name = tensor("op_3759_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3759_end_0 = const()[name = tensor("op_3759_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3759_end_mask_0 = const()[name = tensor("op_3759_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3759_cast_fp16 = slice_by_index(begin = var_3759_begin_0, end = var_3759_end_0, end_mask = var_3759_end_mask_0, x = var_3689_cast_fp16)[name = tensor("op_3759_cast_fp16")]; tensor var_3760_begin_0 = const()[name = tensor("op_3760_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3760_end_0 = const()[name = tensor("op_3760_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3760_end_mask_0 = const()[name = tensor("op_3760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3760_cast_fp16 = slice_by_index(begin = var_3760_begin_0, end = var_3760_end_0, end_mask = var_3760_end_mask_0, x = var_3693_cast_fp16)[name = tensor("op_3760_cast_fp16")]; tensor var_3761_begin_0 = const()[name = tensor("op_3761_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3761_end_0 = const()[name = tensor("op_3761_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3761_end_mask_0 = const()[name = tensor("op_3761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3761_cast_fp16 = slice_by_index(begin = var_3761_begin_0, end = var_3761_end_0, end_mask = var_3761_end_mask_0, x = var_3693_cast_fp16)[name = tensor("op_3761_cast_fp16")]; tensor var_3762_begin_0 = const()[name = tensor("op_3762_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3762_end_0 = const()[name = tensor("op_3762_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3762_end_mask_0 = const()[name = tensor("op_3762_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3762_cast_fp16 = slice_by_index(begin = var_3762_begin_0, end = var_3762_end_0, end_mask = var_3762_end_mask_0, x = var_3693_cast_fp16)[name = tensor("op_3762_cast_fp16")]; tensor var_3763_begin_0 = const()[name = tensor("op_3763_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3763_end_0 = const()[name = tensor("op_3763_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3763_end_mask_0 = const()[name = tensor("op_3763_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3763_cast_fp16 = slice_by_index(begin = var_3763_begin_0, end = var_3763_end_0, end_mask = var_3763_end_mask_0, x = var_3693_cast_fp16)[name = tensor("op_3763_cast_fp16")]; tensor var_3764_begin_0 = const()[name = tensor("op_3764_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3764_end_0 = const()[name = tensor("op_3764_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3764_end_mask_0 = const()[name = tensor("op_3764_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3764_cast_fp16 = slice_by_index(begin = var_3764_begin_0, end = var_3764_end_0, end_mask = var_3764_end_mask_0, x = var_3693_cast_fp16)[name = tensor("op_3764_cast_fp16")]; tensor var_3765_begin_0 = const()[name = tensor("op_3765_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3765_end_0 = const()[name = tensor("op_3765_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3765_end_mask_0 = const()[name = tensor("op_3765_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3765_cast_fp16 = slice_by_index(begin = var_3765_begin_0, end = var_3765_end_0, end_mask = var_3765_end_mask_0, x = var_3693_cast_fp16)[name = tensor("op_3765_cast_fp16")]; tensor var_3766_begin_0 = const()[name = tensor("op_3766_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3766_end_0 = const()[name = tensor("op_3766_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_3766_end_mask_0 = const()[name = tensor("op_3766_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3766_cast_fp16 = slice_by_index(begin = var_3766_begin_0, end = var_3766_end_0, end_mask = var_3766_end_mask_0, x = var_3697_cast_fp16)[name = tensor("op_3766_cast_fp16")]; tensor var_3767_begin_0 = const()[name = tensor("op_3767_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3767_end_0 = const()[name = tensor("op_3767_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_3767_end_mask_0 = const()[name = tensor("op_3767_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3767_cast_fp16 = slice_by_index(begin = var_3767_begin_0, end = var_3767_end_0, end_mask = var_3767_end_mask_0, x = var_3697_cast_fp16)[name = tensor("op_3767_cast_fp16")]; tensor var_3768_begin_0 = const()[name = tensor("op_3768_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3768_end_0 = const()[name = tensor("op_3768_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_3768_end_mask_0 = const()[name = tensor("op_3768_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3768_cast_fp16 = slice_by_index(begin = var_3768_begin_0, end = var_3768_end_0, end_mask = var_3768_end_mask_0, x = var_3697_cast_fp16)[name = tensor("op_3768_cast_fp16")]; tensor var_3769_begin_0 = const()[name = tensor("op_3769_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_3769_end_0 = const()[name = tensor("op_3769_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_3769_end_mask_0 = const()[name = tensor("op_3769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3769_cast_fp16 = slice_by_index(begin = var_3769_begin_0, end = var_3769_end_0, end_mask = var_3769_end_mask_0, x = var_3697_cast_fp16)[name = tensor("op_3769_cast_fp16")]; tensor var_3770_begin_0 = const()[name = tensor("op_3770_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_3770_end_0 = const()[name = tensor("op_3770_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_3770_end_mask_0 = const()[name = tensor("op_3770_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3770_cast_fp16 = slice_by_index(begin = var_3770_begin_0, end = var_3770_end_0, end_mask = var_3770_end_mask_0, x = var_3697_cast_fp16)[name = tensor("op_3770_cast_fp16")]; tensor var_3771_begin_0 = const()[name = tensor("op_3771_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_3771_end_0 = const()[name = tensor("op_3771_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_3771_end_mask_0 = const()[name = tensor("op_3771_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3771_cast_fp16 = slice_by_index(begin = var_3771_begin_0, end = var_3771_end_0, end_mask = var_3771_end_mask_0, x = var_3697_cast_fp16)[name = tensor("op_3771_cast_fp16")]; tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_3776_begin_0 = const()[name = tensor("op_3776_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3776_end_0 = const()[name = tensor("op_3776_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_3776_end_mask_0 = const()[name = tensor("op_3776_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor("transpose_7")]; tensor var_3776_cast_fp16 = slice_by_index(begin = var_3776_begin_0, end = var_3776_end_0, end_mask = var_3776_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3776_cast_fp16")]; tensor var_3780_begin_0 = const()[name = tensor("op_3780_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_3780_end_0 = const()[name = tensor("op_3780_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_3780_end_mask_0 = const()[name = tensor("op_3780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3780_cast_fp16 = slice_by_index(begin = var_3780_begin_0, end = var_3780_end_0, end_mask = var_3780_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3780_cast_fp16")]; tensor var_3784_begin_0 = const()[name = tensor("op_3784_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_3784_end_0 = const()[name = tensor("op_3784_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_3784_end_mask_0 = const()[name = tensor("op_3784_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3784_cast_fp16 = slice_by_index(begin = var_3784_begin_0, end = var_3784_end_0, end_mask = var_3784_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3784_cast_fp16")]; tensor var_3788_begin_0 = const()[name = tensor("op_3788_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_3788_end_0 = const()[name = tensor("op_3788_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_3788_end_mask_0 = const()[name = tensor("op_3788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3788_cast_fp16 = slice_by_index(begin = var_3788_begin_0, end = var_3788_end_0, end_mask = var_3788_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3788_cast_fp16")]; tensor var_3792_begin_0 = const()[name = tensor("op_3792_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_3792_end_0 = const()[name = tensor("op_3792_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_3792_end_mask_0 = const()[name = tensor("op_3792_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3792_cast_fp16 = slice_by_index(begin = var_3792_begin_0, end = var_3792_end_0, end_mask = var_3792_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3792_cast_fp16")]; tensor var_3796_begin_0 = const()[name = tensor("op_3796_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_3796_end_0 = const()[name = tensor("op_3796_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_3796_end_mask_0 = const()[name = tensor("op_3796_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3796_cast_fp16 = slice_by_index(begin = var_3796_begin_0, end = var_3796_end_0, end_mask = var_3796_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3796_cast_fp16")]; tensor var_3800_begin_0 = const()[name = tensor("op_3800_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_3800_end_0 = const()[name = tensor("op_3800_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_3800_end_mask_0 = const()[name = tensor("op_3800_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3800_cast_fp16 = slice_by_index(begin = var_3800_begin_0, end = var_3800_end_0, end_mask = var_3800_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3800_cast_fp16")]; tensor var_3804_begin_0 = const()[name = tensor("op_3804_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_3804_end_0 = const()[name = tensor("op_3804_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_3804_end_mask_0 = const()[name = tensor("op_3804_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3804_cast_fp16 = slice_by_index(begin = var_3804_begin_0, end = var_3804_end_0, end_mask = var_3804_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3804_cast_fp16")]; tensor var_3808_begin_0 = const()[name = tensor("op_3808_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3808_end_0 = const()[name = tensor("op_3808_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_3808_end_mask_0 = const()[name = tensor("op_3808_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3808_cast_fp16 = slice_by_index(begin = var_3808_begin_0, end = var_3808_end_0, end_mask = var_3808_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3808_cast_fp16")]; tensor var_3812_begin_0 = const()[name = tensor("op_3812_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_3812_end_0 = const()[name = tensor("op_3812_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_3812_end_mask_0 = const()[name = tensor("op_3812_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3812_cast_fp16 = slice_by_index(begin = var_3812_begin_0, end = var_3812_end_0, end_mask = var_3812_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3812_cast_fp16")]; tensor var_3816_begin_0 = const()[name = tensor("op_3816_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_3816_end_0 = const()[name = tensor("op_3816_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_3816_end_mask_0 = const()[name = tensor("op_3816_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3816_cast_fp16 = slice_by_index(begin = var_3816_begin_0, end = var_3816_end_0, end_mask = var_3816_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3816_cast_fp16")]; tensor var_3820_begin_0 = const()[name = tensor("op_3820_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_3820_end_0 = const()[name = tensor("op_3820_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_3820_end_mask_0 = const()[name = tensor("op_3820_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3820_cast_fp16 = slice_by_index(begin = var_3820_begin_0, end = var_3820_end_0, end_mask = var_3820_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_3820_cast_fp16")]; tensor var_3822_begin_0 = const()[name = tensor("op_3822_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3822_end_0 = const()[name = tensor("op_3822_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3822_end_mask_0 = const()[name = tensor("op_3822_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3822_cast_fp16 = slice_by_index(begin = var_3822_begin_0, end = var_3822_end_0, end_mask = var_3822_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3822_cast_fp16")]; tensor var_3826_begin_0 = const()[name = tensor("op_3826_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_3826_end_0 = const()[name = tensor("op_3826_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_3826_end_mask_0 = const()[name = tensor("op_3826_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3826_cast_fp16 = slice_by_index(begin = var_3826_begin_0, end = var_3826_end_0, end_mask = var_3826_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3826_cast_fp16")]; tensor var_3830_begin_0 = const()[name = tensor("op_3830_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_3830_end_0 = const()[name = tensor("op_3830_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_3830_end_mask_0 = const()[name = tensor("op_3830_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3830_cast_fp16 = slice_by_index(begin = var_3830_begin_0, end = var_3830_end_0, end_mask = var_3830_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3830_cast_fp16")]; tensor var_3834_begin_0 = const()[name = tensor("op_3834_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_3834_end_0 = const()[name = tensor("op_3834_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_3834_end_mask_0 = const()[name = tensor("op_3834_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3834_cast_fp16 = slice_by_index(begin = var_3834_begin_0, end = var_3834_end_0, end_mask = var_3834_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3834_cast_fp16")]; tensor var_3838_begin_0 = const()[name = tensor("op_3838_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_3838_end_0 = const()[name = tensor("op_3838_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_3838_end_mask_0 = const()[name = tensor("op_3838_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3838_cast_fp16 = slice_by_index(begin = var_3838_begin_0, end = var_3838_end_0, end_mask = var_3838_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3838_cast_fp16")]; tensor var_3842_begin_0 = const()[name = tensor("op_3842_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3842_end_0 = const()[name = tensor("op_3842_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_3842_end_mask_0 = const()[name = tensor("op_3842_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3842_cast_fp16 = slice_by_index(begin = var_3842_begin_0, end = var_3842_end_0, end_mask = var_3842_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3842_cast_fp16")]; tensor var_3846_begin_0 = const()[name = tensor("op_3846_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_3846_end_0 = const()[name = tensor("op_3846_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_3846_end_mask_0 = const()[name = tensor("op_3846_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3846_cast_fp16 = slice_by_index(begin = var_3846_begin_0, end = var_3846_end_0, end_mask = var_3846_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3846_cast_fp16")]; tensor var_3850_begin_0 = const()[name = tensor("op_3850_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_3850_end_0 = const()[name = tensor("op_3850_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_3850_end_mask_0 = const()[name = tensor("op_3850_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3850_cast_fp16 = slice_by_index(begin = var_3850_begin_0, end = var_3850_end_0, end_mask = var_3850_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3850_cast_fp16")]; tensor var_3854_begin_0 = const()[name = tensor("op_3854_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_3854_end_0 = const()[name = tensor("op_3854_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_3854_end_mask_0 = const()[name = tensor("op_3854_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3854_cast_fp16 = slice_by_index(begin = var_3854_begin_0, end = var_3854_end_0, end_mask = var_3854_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3854_cast_fp16")]; tensor var_3858_begin_0 = const()[name = tensor("op_3858_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_3858_end_0 = const()[name = tensor("op_3858_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_3858_end_mask_0 = const()[name = tensor("op_3858_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3858_cast_fp16 = slice_by_index(begin = var_3858_begin_0, end = var_3858_end_0, end_mask = var_3858_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3858_cast_fp16")]; tensor var_3862_begin_0 = const()[name = tensor("op_3862_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_3862_end_0 = const()[name = tensor("op_3862_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_3862_end_mask_0 = const()[name = tensor("op_3862_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3862_cast_fp16 = slice_by_index(begin = var_3862_begin_0, end = var_3862_end_0, end_mask = var_3862_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3862_cast_fp16")]; tensor var_3866_begin_0 = const()[name = tensor("op_3866_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_3866_end_0 = const()[name = tensor("op_3866_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_3866_end_mask_0 = const()[name = tensor("op_3866_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_3866_cast_fp16 = slice_by_index(begin = var_3866_begin_0, end = var_3866_end_0, end_mask = var_3866_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_3866_cast_fp16")]; tensor _SplitHeadsQ__mh_w_577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_577_equation_0, values = (var_3776_cast_fp16, var_3700_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_579_equation_0, values = (var_3776_cast_fp16, var_3701_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_581_equation_0, values = (var_3776_cast_fp16, var_3702_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_583_equation_0, values = (var_3776_cast_fp16, var_3703_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_583_cast_fp16")]; tensor _SplitHeadsQ__mh_w_585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_585_equation_0, values = (var_3776_cast_fp16, var_3704_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_587_equation_0, values = (var_3776_cast_fp16, var_3705_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_589_equation_0, values = (var_3780_cast_fp16, var_3706_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_591_equation_0, values = (var_3780_cast_fp16, var_3707_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_593_equation_0, values = (var_3780_cast_fp16, var_3708_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_595_equation_0, values = (var_3780_cast_fp16, var_3709_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_597_equation_0, values = (var_3780_cast_fp16, var_3710_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_599_equation_0, values = (var_3780_cast_fp16, var_3711_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_599_cast_fp16")]; tensor _SplitHeadsQ__mh_w_601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_601_equation_0, values = (var_3784_cast_fp16, var_3712_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_603_equation_0, values = (var_3784_cast_fp16, var_3713_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_605_equation_0, values = (var_3784_cast_fp16, var_3714_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_607_equation_0, values = (var_3784_cast_fp16, var_3715_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_609_equation_0, values = (var_3784_cast_fp16, var_3716_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_611_equation_0, values = (var_3784_cast_fp16, var_3717_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_613_equation_0, values = (var_3788_cast_fp16, var_3718_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_615_equation_0, values = (var_3788_cast_fp16, var_3719_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_617_equation_0, values = (var_3788_cast_fp16, var_3720_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_619_equation_0, values = (var_3788_cast_fp16, var_3721_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_621_equation_0, values = (var_3788_cast_fp16, var_3722_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_623_equation_0, values = (var_3788_cast_fp16, var_3723_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_625_equation_0, values = (var_3792_cast_fp16, var_3724_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_627_equation_0, values = (var_3792_cast_fp16, var_3725_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_629_equation_0, values = (var_3792_cast_fp16, var_3726_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_631_equation_0, values = (var_3792_cast_fp16, var_3727_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_633_equation_0, values = (var_3792_cast_fp16, var_3728_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_635_equation_0, values = (var_3792_cast_fp16, var_3729_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_637_equation_0, values = (var_3796_cast_fp16, var_3730_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_639_equation_0, values = (var_3796_cast_fp16, var_3731_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_639_cast_fp16")]; tensor _SplitHeadsQ__mh_w_641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_641_equation_0, values = (var_3796_cast_fp16, var_3732_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_643_equation_0, values = (var_3796_cast_fp16, var_3733_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_645_equation_0, values = (var_3796_cast_fp16, var_3734_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_647_equation_0, values = (var_3796_cast_fp16, var_3735_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_649_equation_0, values = (var_3800_cast_fp16, var_3736_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_651_equation_0, values = (var_3800_cast_fp16, var_3737_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_653_equation_0, values = (var_3800_cast_fp16, var_3738_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_655_equation_0, values = (var_3800_cast_fp16, var_3739_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_657_equation_0, values = (var_3800_cast_fp16, var_3740_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_659_equation_0, values = (var_3800_cast_fp16, var_3741_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_661_equation_0, values = (var_3804_cast_fp16, var_3742_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_663_equation_0, values = (var_3804_cast_fp16, var_3743_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_665_equation_0, values = (var_3804_cast_fp16, var_3744_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_667_equation_0, values = (var_3804_cast_fp16, var_3745_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_669_equation_0, values = (var_3804_cast_fp16, var_3746_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_671_equation_0, values = (var_3804_cast_fp16, var_3747_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_673_equation_0, values = (var_3808_cast_fp16, var_3748_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_675_equation_0, values = (var_3808_cast_fp16, var_3749_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_677_equation_0, values = (var_3808_cast_fp16, var_3750_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_679_equation_0, values = (var_3808_cast_fp16, var_3751_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_679_cast_fp16")]; tensor _SplitHeadsQ__mh_w_681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_681_equation_0, values = (var_3808_cast_fp16, var_3752_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_683_equation_0, values = (var_3808_cast_fp16, var_3753_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_685_equation_0, values = (var_3812_cast_fp16, var_3754_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_687_equation_0, values = (var_3812_cast_fp16, var_3755_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_689_equation_0, values = (var_3812_cast_fp16, var_3756_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_691_equation_0, values = (var_3812_cast_fp16, var_3757_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_693_equation_0, values = (var_3812_cast_fp16, var_3758_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_695_equation_0, values = (var_3812_cast_fp16, var_3759_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_697_equation_0, values = (var_3816_cast_fp16, var_3760_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_699_equation_0, values = (var_3816_cast_fp16, var_3761_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_701_equation_0, values = (var_3816_cast_fp16, var_3762_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_703_equation_0, values = (var_3816_cast_fp16, var_3763_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_705_equation_0, values = (var_3816_cast_fp16, var_3764_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_707_equation_0, values = (var_3816_cast_fp16, var_3765_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_709_equation_0, values = (var_3820_cast_fp16, var_3766_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_711_equation_0, values = (var_3820_cast_fp16, var_3767_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_713_equation_0, values = (var_3820_cast_fp16, var_3768_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_715_equation_0, values = (var_3820_cast_fp16, var_3769_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_717_equation_0, values = (var_3820_cast_fp16, var_3770_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_719_equation_0, values = (var_3820_cast_fp16, var_3771_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_719_cast_fp16")]; tensor var_4013_to_fp16 = const()[name = tensor("op_4013_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_577_cast_fp16, y = var_4013_to_fp16)[name = tensor("aw_chunk_577_cast_fp16")]; tensor var_4015_to_fp16 = const()[name = tensor("op_4015_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_579_cast_fp16, y = var_4015_to_fp16)[name = tensor("aw_chunk_579_cast_fp16")]; tensor var_4017_to_fp16 = const()[name = tensor("op_4017_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_581_cast_fp16, y = var_4017_to_fp16)[name = tensor("aw_chunk_581_cast_fp16")]; tensor var_4019_to_fp16 = const()[name = tensor("op_4019_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_583_cast_fp16, y = var_4019_to_fp16)[name = tensor("aw_chunk_583_cast_fp16")]; tensor var_4021_to_fp16 = const()[name = tensor("op_4021_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_585_cast_fp16, y = var_4021_to_fp16)[name = tensor("aw_chunk_585_cast_fp16")]; tensor var_4023_to_fp16 = const()[name = tensor("op_4023_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_587_cast_fp16, y = var_4023_to_fp16)[name = tensor("aw_chunk_587_cast_fp16")]; tensor var_4025_to_fp16 = const()[name = tensor("op_4025_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_589_cast_fp16, y = var_4025_to_fp16)[name = tensor("aw_chunk_589_cast_fp16")]; tensor var_4027_to_fp16 = const()[name = tensor("op_4027_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_591_cast_fp16, y = var_4027_to_fp16)[name = tensor("aw_chunk_591_cast_fp16")]; tensor var_4029_to_fp16 = const()[name = tensor("op_4029_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_593_cast_fp16, y = var_4029_to_fp16)[name = tensor("aw_chunk_593_cast_fp16")]; tensor var_4031_to_fp16 = const()[name = tensor("op_4031_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_595_cast_fp16, y = var_4031_to_fp16)[name = tensor("aw_chunk_595_cast_fp16")]; tensor var_4033_to_fp16 = const()[name = tensor("op_4033_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_597_cast_fp16, y = var_4033_to_fp16)[name = tensor("aw_chunk_597_cast_fp16")]; tensor var_4035_to_fp16 = const()[name = tensor("op_4035_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_599_cast_fp16, y = var_4035_to_fp16)[name = tensor("aw_chunk_599_cast_fp16")]; tensor var_4037_to_fp16 = const()[name = tensor("op_4037_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_601_cast_fp16, y = var_4037_to_fp16)[name = tensor("aw_chunk_601_cast_fp16")]; tensor var_4039_to_fp16 = const()[name = tensor("op_4039_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_603_cast_fp16, y = var_4039_to_fp16)[name = tensor("aw_chunk_603_cast_fp16")]; tensor var_4041_to_fp16 = const()[name = tensor("op_4041_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_605_cast_fp16, y = var_4041_to_fp16)[name = tensor("aw_chunk_605_cast_fp16")]; tensor var_4043_to_fp16 = const()[name = tensor("op_4043_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_607_cast_fp16, y = var_4043_to_fp16)[name = tensor("aw_chunk_607_cast_fp16")]; tensor var_4045_to_fp16 = const()[name = tensor("op_4045_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_609_cast_fp16, y = var_4045_to_fp16)[name = tensor("aw_chunk_609_cast_fp16")]; tensor var_4047_to_fp16 = const()[name = tensor("op_4047_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_611_cast_fp16, y = var_4047_to_fp16)[name = tensor("aw_chunk_611_cast_fp16")]; tensor var_4049_to_fp16 = const()[name = tensor("op_4049_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_613_cast_fp16, y = var_4049_to_fp16)[name = tensor("aw_chunk_613_cast_fp16")]; tensor var_4051_to_fp16 = const()[name = tensor("op_4051_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_615_cast_fp16, y = var_4051_to_fp16)[name = tensor("aw_chunk_615_cast_fp16")]; tensor var_4053_to_fp16 = const()[name = tensor("op_4053_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_617_cast_fp16, y = var_4053_to_fp16)[name = tensor("aw_chunk_617_cast_fp16")]; tensor var_4055_to_fp16 = const()[name = tensor("op_4055_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_619_cast_fp16, y = var_4055_to_fp16)[name = tensor("aw_chunk_619_cast_fp16")]; tensor var_4057_to_fp16 = const()[name = tensor("op_4057_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_621_cast_fp16, y = var_4057_to_fp16)[name = tensor("aw_chunk_621_cast_fp16")]; tensor var_4059_to_fp16 = const()[name = tensor("op_4059_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_623_cast_fp16, y = var_4059_to_fp16)[name = tensor("aw_chunk_623_cast_fp16")]; tensor var_4061_to_fp16 = const()[name = tensor("op_4061_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_625_cast_fp16, y = var_4061_to_fp16)[name = tensor("aw_chunk_625_cast_fp16")]; tensor var_4063_to_fp16 = const()[name = tensor("op_4063_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_627_cast_fp16, y = var_4063_to_fp16)[name = tensor("aw_chunk_627_cast_fp16")]; tensor var_4065_to_fp16 = const()[name = tensor("op_4065_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_629_cast_fp16, y = var_4065_to_fp16)[name = tensor("aw_chunk_629_cast_fp16")]; tensor var_4067_to_fp16 = const()[name = tensor("op_4067_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_631_cast_fp16, y = var_4067_to_fp16)[name = tensor("aw_chunk_631_cast_fp16")]; tensor var_4069_to_fp16 = const()[name = tensor("op_4069_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_633_cast_fp16, y = var_4069_to_fp16)[name = tensor("aw_chunk_633_cast_fp16")]; tensor var_4071_to_fp16 = const()[name = tensor("op_4071_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_635_cast_fp16, y = var_4071_to_fp16)[name = tensor("aw_chunk_635_cast_fp16")]; tensor var_4073_to_fp16 = const()[name = tensor("op_4073_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_637_cast_fp16, y = var_4073_to_fp16)[name = tensor("aw_chunk_637_cast_fp16")]; tensor var_4075_to_fp16 = const()[name = tensor("op_4075_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_639_cast_fp16, y = var_4075_to_fp16)[name = tensor("aw_chunk_639_cast_fp16")]; tensor var_4077_to_fp16 = const()[name = tensor("op_4077_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_641_cast_fp16, y = var_4077_to_fp16)[name = tensor("aw_chunk_641_cast_fp16")]; tensor var_4079_to_fp16 = const()[name = tensor("op_4079_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_643_cast_fp16, y = var_4079_to_fp16)[name = tensor("aw_chunk_643_cast_fp16")]; tensor var_4081_to_fp16 = const()[name = tensor("op_4081_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_645_cast_fp16, y = var_4081_to_fp16)[name = tensor("aw_chunk_645_cast_fp16")]; tensor var_4083_to_fp16 = const()[name = tensor("op_4083_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_647_cast_fp16, y = var_4083_to_fp16)[name = tensor("aw_chunk_647_cast_fp16")]; tensor var_4085_to_fp16 = const()[name = tensor("op_4085_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_649_cast_fp16, y = var_4085_to_fp16)[name = tensor("aw_chunk_649_cast_fp16")]; tensor var_4087_to_fp16 = const()[name = tensor("op_4087_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_651_cast_fp16, y = var_4087_to_fp16)[name = tensor("aw_chunk_651_cast_fp16")]; tensor var_4089_to_fp16 = const()[name = tensor("op_4089_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_653_cast_fp16, y = var_4089_to_fp16)[name = tensor("aw_chunk_653_cast_fp16")]; tensor var_4091_to_fp16 = const()[name = tensor("op_4091_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_655_cast_fp16, y = var_4091_to_fp16)[name = tensor("aw_chunk_655_cast_fp16")]; tensor var_4093_to_fp16 = const()[name = tensor("op_4093_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_657_cast_fp16, y = var_4093_to_fp16)[name = tensor("aw_chunk_657_cast_fp16")]; tensor var_4095_to_fp16 = const()[name = tensor("op_4095_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_659_cast_fp16, y = var_4095_to_fp16)[name = tensor("aw_chunk_659_cast_fp16")]; tensor var_4097_to_fp16 = const()[name = tensor("op_4097_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_661_cast_fp16, y = var_4097_to_fp16)[name = tensor("aw_chunk_661_cast_fp16")]; tensor var_4099_to_fp16 = const()[name = tensor("op_4099_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_663_cast_fp16, y = var_4099_to_fp16)[name = tensor("aw_chunk_663_cast_fp16")]; tensor var_4101_to_fp16 = const()[name = tensor("op_4101_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_665_cast_fp16, y = var_4101_to_fp16)[name = tensor("aw_chunk_665_cast_fp16")]; tensor var_4103_to_fp16 = const()[name = tensor("op_4103_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_667_cast_fp16, y = var_4103_to_fp16)[name = tensor("aw_chunk_667_cast_fp16")]; tensor var_4105_to_fp16 = const()[name = tensor("op_4105_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_669_cast_fp16, y = var_4105_to_fp16)[name = tensor("aw_chunk_669_cast_fp16")]; tensor var_4107_to_fp16 = const()[name = tensor("op_4107_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_671_cast_fp16, y = var_4107_to_fp16)[name = tensor("aw_chunk_671_cast_fp16")]; tensor var_4109_to_fp16 = const()[name = tensor("op_4109_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_673_cast_fp16, y = var_4109_to_fp16)[name = tensor("aw_chunk_673_cast_fp16")]; tensor var_4111_to_fp16 = const()[name = tensor("op_4111_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_675_cast_fp16, y = var_4111_to_fp16)[name = tensor("aw_chunk_675_cast_fp16")]; tensor var_4113_to_fp16 = const()[name = tensor("op_4113_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_677_cast_fp16, y = var_4113_to_fp16)[name = tensor("aw_chunk_677_cast_fp16")]; tensor var_4115_to_fp16 = const()[name = tensor("op_4115_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_679_cast_fp16, y = var_4115_to_fp16)[name = tensor("aw_chunk_679_cast_fp16")]; tensor var_4117_to_fp16 = const()[name = tensor("op_4117_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_681_cast_fp16, y = var_4117_to_fp16)[name = tensor("aw_chunk_681_cast_fp16")]; tensor var_4119_to_fp16 = const()[name = tensor("op_4119_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_683_cast_fp16, y = var_4119_to_fp16)[name = tensor("aw_chunk_683_cast_fp16")]; tensor var_4121_to_fp16 = const()[name = tensor("op_4121_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_685_cast_fp16, y = var_4121_to_fp16)[name = tensor("aw_chunk_685_cast_fp16")]; tensor var_4123_to_fp16 = const()[name = tensor("op_4123_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_687_cast_fp16, y = var_4123_to_fp16)[name = tensor("aw_chunk_687_cast_fp16")]; tensor var_4125_to_fp16 = const()[name = tensor("op_4125_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_689_cast_fp16, y = var_4125_to_fp16)[name = tensor("aw_chunk_689_cast_fp16")]; tensor var_4127_to_fp16 = const()[name = tensor("op_4127_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_691_cast_fp16, y = var_4127_to_fp16)[name = tensor("aw_chunk_691_cast_fp16")]; tensor var_4129_to_fp16 = const()[name = tensor("op_4129_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_693_cast_fp16, y = var_4129_to_fp16)[name = tensor("aw_chunk_693_cast_fp16")]; tensor var_4131_to_fp16 = const()[name = tensor("op_4131_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_695_cast_fp16, y = var_4131_to_fp16)[name = tensor("aw_chunk_695_cast_fp16")]; tensor var_4133_to_fp16 = const()[name = tensor("op_4133_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_697_cast_fp16, y = var_4133_to_fp16)[name = tensor("aw_chunk_697_cast_fp16")]; tensor var_4135_to_fp16 = const()[name = tensor("op_4135_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_699_cast_fp16, y = var_4135_to_fp16)[name = tensor("aw_chunk_699_cast_fp16")]; tensor var_4137_to_fp16 = const()[name = tensor("op_4137_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_701_cast_fp16, y = var_4137_to_fp16)[name = tensor("aw_chunk_701_cast_fp16")]; tensor var_4139_to_fp16 = const()[name = tensor("op_4139_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_703_cast_fp16, y = var_4139_to_fp16)[name = tensor("aw_chunk_703_cast_fp16")]; tensor var_4141_to_fp16 = const()[name = tensor("op_4141_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_705_cast_fp16, y = var_4141_to_fp16)[name = tensor("aw_chunk_705_cast_fp16")]; tensor var_4143_to_fp16 = const()[name = tensor("op_4143_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_707_cast_fp16, y = var_4143_to_fp16)[name = tensor("aw_chunk_707_cast_fp16")]; tensor var_4145_to_fp16 = const()[name = tensor("op_4145_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_709_cast_fp16, y = var_4145_to_fp16)[name = tensor("aw_chunk_709_cast_fp16")]; tensor var_4147_to_fp16 = const()[name = tensor("op_4147_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_711_cast_fp16, y = var_4147_to_fp16)[name = tensor("aw_chunk_711_cast_fp16")]; tensor var_4149_to_fp16 = const()[name = tensor("op_4149_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_713_cast_fp16, y = var_4149_to_fp16)[name = tensor("aw_chunk_713_cast_fp16")]; tensor var_4151_to_fp16 = const()[name = tensor("op_4151_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_715_cast_fp16, y = var_4151_to_fp16)[name = tensor("aw_chunk_715_cast_fp16")]; tensor var_4153_to_fp16 = const()[name = tensor("op_4153_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_717_cast_fp16, y = var_4153_to_fp16)[name = tensor("aw_chunk_717_cast_fp16")]; tensor var_4155_to_fp16 = const()[name = tensor("op_4155_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_719_cast_fp16, y = var_4155_to_fp16)[name = tensor("aw_chunk_719_cast_fp16")]; tensor var_4157_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_577_cast_fp16)[name = tensor("op_4157_cast_fp16")]; tensor var_4158_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_579_cast_fp16)[name = tensor("op_4158_cast_fp16")]; tensor var_4159_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_581_cast_fp16)[name = tensor("op_4159_cast_fp16")]; tensor var_4160_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_583_cast_fp16)[name = tensor("op_4160_cast_fp16")]; tensor var_4161_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_585_cast_fp16)[name = tensor("op_4161_cast_fp16")]; tensor var_4162_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_587_cast_fp16)[name = tensor("op_4162_cast_fp16")]; tensor var_4163_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_589_cast_fp16)[name = tensor("op_4163_cast_fp16")]; tensor var_4164_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_591_cast_fp16)[name = tensor("op_4164_cast_fp16")]; tensor var_4165_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_593_cast_fp16)[name = tensor("op_4165_cast_fp16")]; tensor var_4166_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_595_cast_fp16)[name = tensor("op_4166_cast_fp16")]; tensor var_4167_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_597_cast_fp16)[name = tensor("op_4167_cast_fp16")]; tensor var_4168_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_599_cast_fp16)[name = tensor("op_4168_cast_fp16")]; tensor var_4169_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_601_cast_fp16)[name = tensor("op_4169_cast_fp16")]; tensor var_4170_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_603_cast_fp16)[name = tensor("op_4170_cast_fp16")]; tensor var_4171_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_605_cast_fp16)[name = tensor("op_4171_cast_fp16")]; tensor var_4172_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_607_cast_fp16)[name = tensor("op_4172_cast_fp16")]; tensor var_4173_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_609_cast_fp16)[name = tensor("op_4173_cast_fp16")]; tensor var_4174_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_611_cast_fp16)[name = tensor("op_4174_cast_fp16")]; tensor var_4175_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_613_cast_fp16)[name = tensor("op_4175_cast_fp16")]; tensor var_4176_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_615_cast_fp16)[name = tensor("op_4176_cast_fp16")]; tensor var_4177_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_617_cast_fp16)[name = tensor("op_4177_cast_fp16")]; tensor var_4178_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_619_cast_fp16)[name = tensor("op_4178_cast_fp16")]; tensor var_4179_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_621_cast_fp16)[name = tensor("op_4179_cast_fp16")]; tensor var_4180_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_623_cast_fp16)[name = tensor("op_4180_cast_fp16")]; tensor var_4181_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_625_cast_fp16)[name = tensor("op_4181_cast_fp16")]; tensor var_4182_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_627_cast_fp16)[name = tensor("op_4182_cast_fp16")]; tensor var_4183_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_629_cast_fp16)[name = tensor("op_4183_cast_fp16")]; tensor var_4184_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_631_cast_fp16)[name = tensor("op_4184_cast_fp16")]; tensor var_4185_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_633_cast_fp16)[name = tensor("op_4185_cast_fp16")]; tensor var_4186_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_635_cast_fp16)[name = tensor("op_4186_cast_fp16")]; tensor var_4187_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_637_cast_fp16)[name = tensor("op_4187_cast_fp16")]; tensor var_4188_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_639_cast_fp16)[name = tensor("op_4188_cast_fp16")]; tensor var_4189_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_641_cast_fp16)[name = tensor("op_4189_cast_fp16")]; tensor var_4190_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_643_cast_fp16)[name = tensor("op_4190_cast_fp16")]; tensor var_4191_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_645_cast_fp16)[name = tensor("op_4191_cast_fp16")]; tensor var_4192_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_647_cast_fp16)[name = tensor("op_4192_cast_fp16")]; tensor var_4193_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_649_cast_fp16)[name = tensor("op_4193_cast_fp16")]; tensor var_4194_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_651_cast_fp16)[name = tensor("op_4194_cast_fp16")]; tensor var_4195_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_653_cast_fp16)[name = tensor("op_4195_cast_fp16")]; tensor var_4196_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_655_cast_fp16)[name = tensor("op_4196_cast_fp16")]; tensor var_4197_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_657_cast_fp16)[name = tensor("op_4197_cast_fp16")]; tensor var_4198_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_659_cast_fp16)[name = tensor("op_4198_cast_fp16")]; tensor var_4199_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_661_cast_fp16)[name = tensor("op_4199_cast_fp16")]; tensor var_4200_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_663_cast_fp16)[name = tensor("op_4200_cast_fp16")]; tensor var_4201_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_665_cast_fp16)[name = tensor("op_4201_cast_fp16")]; tensor var_4202_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_667_cast_fp16)[name = tensor("op_4202_cast_fp16")]; tensor var_4203_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_669_cast_fp16)[name = tensor("op_4203_cast_fp16")]; tensor var_4204_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_671_cast_fp16)[name = tensor("op_4204_cast_fp16")]; tensor var_4205_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_673_cast_fp16)[name = tensor("op_4205_cast_fp16")]; tensor var_4206_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_675_cast_fp16)[name = tensor("op_4206_cast_fp16")]; tensor var_4207_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_677_cast_fp16)[name = tensor("op_4207_cast_fp16")]; tensor var_4208_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_679_cast_fp16)[name = tensor("op_4208_cast_fp16")]; tensor var_4209_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_681_cast_fp16)[name = tensor("op_4209_cast_fp16")]; tensor var_4210_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_683_cast_fp16)[name = tensor("op_4210_cast_fp16")]; tensor var_4211_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_685_cast_fp16)[name = tensor("op_4211_cast_fp16")]; tensor var_4212_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_687_cast_fp16)[name = tensor("op_4212_cast_fp16")]; tensor var_4213_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_689_cast_fp16)[name = tensor("op_4213_cast_fp16")]; tensor var_4214_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_691_cast_fp16)[name = tensor("op_4214_cast_fp16")]; tensor var_4215_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_693_cast_fp16)[name = tensor("op_4215_cast_fp16")]; tensor var_4216_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_695_cast_fp16)[name = tensor("op_4216_cast_fp16")]; tensor var_4217_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_697_cast_fp16)[name = tensor("op_4217_cast_fp16")]; tensor var_4218_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_699_cast_fp16)[name = tensor("op_4218_cast_fp16")]; tensor var_4219_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_701_cast_fp16)[name = tensor("op_4219_cast_fp16")]; tensor var_4220_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_703_cast_fp16)[name = tensor("op_4220_cast_fp16")]; tensor var_4221_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_705_cast_fp16)[name = tensor("op_4221_cast_fp16")]; tensor var_4222_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_707_cast_fp16)[name = tensor("op_4222_cast_fp16")]; tensor var_4223_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_709_cast_fp16)[name = tensor("op_4223_cast_fp16")]; tensor var_4224_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_711_cast_fp16)[name = tensor("op_4224_cast_fp16")]; tensor var_4225_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_713_cast_fp16)[name = tensor("op_4225_cast_fp16")]; tensor var_4226_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_715_cast_fp16)[name = tensor("op_4226_cast_fp16")]; tensor var_4227_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_717_cast_fp16)[name = tensor("op_4227_cast_fp16")]; tensor var_4228_cast_fp16 = softmax(axis = var_3601, x = aw_chunk_719_cast_fp16)[name = tensor("op_4228_cast_fp16")]; tensor var_4230_equation_0 = const()[name = tensor("op_4230_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4230_cast_fp16 = einsum(equation = var_4230_equation_0, values = (var_3822_cast_fp16, var_4157_cast_fp16))[name = tensor("op_4230_cast_fp16")]; tensor var_4232_equation_0 = const()[name = tensor("op_4232_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4232_cast_fp16 = einsum(equation = var_4232_equation_0, values = (var_3822_cast_fp16, var_4158_cast_fp16))[name = tensor("op_4232_cast_fp16")]; tensor var_4234_equation_0 = const()[name = tensor("op_4234_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4234_cast_fp16 = einsum(equation = var_4234_equation_0, values = (var_3822_cast_fp16, var_4159_cast_fp16))[name = tensor("op_4234_cast_fp16")]; tensor var_4236_equation_0 = const()[name = tensor("op_4236_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4236_cast_fp16 = einsum(equation = var_4236_equation_0, values = (var_3822_cast_fp16, var_4160_cast_fp16))[name = tensor("op_4236_cast_fp16")]; tensor var_4238_equation_0 = const()[name = tensor("op_4238_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4238_cast_fp16 = einsum(equation = var_4238_equation_0, values = (var_3822_cast_fp16, var_4161_cast_fp16))[name = tensor("op_4238_cast_fp16")]; tensor var_4240_equation_0 = const()[name = tensor("op_4240_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4240_cast_fp16 = einsum(equation = var_4240_equation_0, values = (var_3822_cast_fp16, var_4162_cast_fp16))[name = tensor("op_4240_cast_fp16")]; tensor var_4242_equation_0 = const()[name = tensor("op_4242_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4242_cast_fp16 = einsum(equation = var_4242_equation_0, values = (var_3826_cast_fp16, var_4163_cast_fp16))[name = tensor("op_4242_cast_fp16")]; tensor var_4244_equation_0 = const()[name = tensor("op_4244_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4244_cast_fp16 = einsum(equation = var_4244_equation_0, values = (var_3826_cast_fp16, var_4164_cast_fp16))[name = tensor("op_4244_cast_fp16")]; tensor var_4246_equation_0 = const()[name = tensor("op_4246_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4246_cast_fp16 = einsum(equation = var_4246_equation_0, values = (var_3826_cast_fp16, var_4165_cast_fp16))[name = tensor("op_4246_cast_fp16")]; tensor var_4248_equation_0 = const()[name = tensor("op_4248_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4248_cast_fp16 = einsum(equation = var_4248_equation_0, values = (var_3826_cast_fp16, var_4166_cast_fp16))[name = tensor("op_4248_cast_fp16")]; tensor var_4250_equation_0 = const()[name = tensor("op_4250_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4250_cast_fp16 = einsum(equation = var_4250_equation_0, values = (var_3826_cast_fp16, var_4167_cast_fp16))[name = tensor("op_4250_cast_fp16")]; tensor var_4252_equation_0 = const()[name = tensor("op_4252_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4252_cast_fp16 = einsum(equation = var_4252_equation_0, values = (var_3826_cast_fp16, var_4168_cast_fp16))[name = tensor("op_4252_cast_fp16")]; tensor var_4254_equation_0 = const()[name = tensor("op_4254_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4254_cast_fp16 = einsum(equation = var_4254_equation_0, values = (var_3830_cast_fp16, var_4169_cast_fp16))[name = tensor("op_4254_cast_fp16")]; tensor var_4256_equation_0 = const()[name = tensor("op_4256_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4256_cast_fp16 = einsum(equation = var_4256_equation_0, values = (var_3830_cast_fp16, var_4170_cast_fp16))[name = tensor("op_4256_cast_fp16")]; tensor var_4258_equation_0 = const()[name = tensor("op_4258_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4258_cast_fp16 = einsum(equation = var_4258_equation_0, values = (var_3830_cast_fp16, var_4171_cast_fp16))[name = tensor("op_4258_cast_fp16")]; tensor var_4260_equation_0 = const()[name = tensor("op_4260_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4260_cast_fp16 = einsum(equation = var_4260_equation_0, values = (var_3830_cast_fp16, var_4172_cast_fp16))[name = tensor("op_4260_cast_fp16")]; tensor var_4262_equation_0 = const()[name = tensor("op_4262_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4262_cast_fp16 = einsum(equation = var_4262_equation_0, values = (var_3830_cast_fp16, var_4173_cast_fp16))[name = tensor("op_4262_cast_fp16")]; tensor var_4264_equation_0 = const()[name = tensor("op_4264_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4264_cast_fp16 = einsum(equation = var_4264_equation_0, values = (var_3830_cast_fp16, var_4174_cast_fp16))[name = tensor("op_4264_cast_fp16")]; tensor var_4266_equation_0 = const()[name = tensor("op_4266_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4266_cast_fp16 = einsum(equation = var_4266_equation_0, values = (var_3834_cast_fp16, var_4175_cast_fp16))[name = tensor("op_4266_cast_fp16")]; tensor var_4268_equation_0 = const()[name = tensor("op_4268_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4268_cast_fp16 = einsum(equation = var_4268_equation_0, values = (var_3834_cast_fp16, var_4176_cast_fp16))[name = tensor("op_4268_cast_fp16")]; tensor var_4270_equation_0 = const()[name = tensor("op_4270_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4270_cast_fp16 = einsum(equation = var_4270_equation_0, values = (var_3834_cast_fp16, var_4177_cast_fp16))[name = tensor("op_4270_cast_fp16")]; tensor var_4272_equation_0 = const()[name = tensor("op_4272_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4272_cast_fp16 = einsum(equation = var_4272_equation_0, values = (var_3834_cast_fp16, var_4178_cast_fp16))[name = tensor("op_4272_cast_fp16")]; tensor var_4274_equation_0 = const()[name = tensor("op_4274_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4274_cast_fp16 = einsum(equation = var_4274_equation_0, values = (var_3834_cast_fp16, var_4179_cast_fp16))[name = tensor("op_4274_cast_fp16")]; tensor var_4276_equation_0 = const()[name = tensor("op_4276_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4276_cast_fp16 = einsum(equation = var_4276_equation_0, values = (var_3834_cast_fp16, var_4180_cast_fp16))[name = tensor("op_4276_cast_fp16")]; tensor var_4278_equation_0 = const()[name = tensor("op_4278_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4278_cast_fp16 = einsum(equation = var_4278_equation_0, values = (var_3838_cast_fp16, var_4181_cast_fp16))[name = tensor("op_4278_cast_fp16")]; tensor var_4280_equation_0 = const()[name = tensor("op_4280_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4280_cast_fp16 = einsum(equation = var_4280_equation_0, values = (var_3838_cast_fp16, var_4182_cast_fp16))[name = tensor("op_4280_cast_fp16")]; tensor var_4282_equation_0 = const()[name = tensor("op_4282_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4282_cast_fp16 = einsum(equation = var_4282_equation_0, values = (var_3838_cast_fp16, var_4183_cast_fp16))[name = tensor("op_4282_cast_fp16")]; tensor var_4284_equation_0 = const()[name = tensor("op_4284_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4284_cast_fp16 = einsum(equation = var_4284_equation_0, values = (var_3838_cast_fp16, var_4184_cast_fp16))[name = tensor("op_4284_cast_fp16")]; tensor var_4286_equation_0 = const()[name = tensor("op_4286_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4286_cast_fp16 = einsum(equation = var_4286_equation_0, values = (var_3838_cast_fp16, var_4185_cast_fp16))[name = tensor("op_4286_cast_fp16")]; tensor var_4288_equation_0 = const()[name = tensor("op_4288_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4288_cast_fp16 = einsum(equation = var_4288_equation_0, values = (var_3838_cast_fp16, var_4186_cast_fp16))[name = tensor("op_4288_cast_fp16")]; tensor var_4290_equation_0 = const()[name = tensor("op_4290_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4290_cast_fp16 = einsum(equation = var_4290_equation_0, values = (var_3842_cast_fp16, var_4187_cast_fp16))[name = tensor("op_4290_cast_fp16")]; tensor var_4292_equation_0 = const()[name = tensor("op_4292_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4292_cast_fp16 = einsum(equation = var_4292_equation_0, values = (var_3842_cast_fp16, var_4188_cast_fp16))[name = tensor("op_4292_cast_fp16")]; tensor var_4294_equation_0 = const()[name = tensor("op_4294_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4294_cast_fp16 = einsum(equation = var_4294_equation_0, values = (var_3842_cast_fp16, var_4189_cast_fp16))[name = tensor("op_4294_cast_fp16")]; tensor var_4296_equation_0 = const()[name = tensor("op_4296_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4296_cast_fp16 = einsum(equation = var_4296_equation_0, values = (var_3842_cast_fp16, var_4190_cast_fp16))[name = tensor("op_4296_cast_fp16")]; tensor var_4298_equation_0 = const()[name = tensor("op_4298_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4298_cast_fp16 = einsum(equation = var_4298_equation_0, values = (var_3842_cast_fp16, var_4191_cast_fp16))[name = tensor("op_4298_cast_fp16")]; tensor var_4300_equation_0 = const()[name = tensor("op_4300_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4300_cast_fp16 = einsum(equation = var_4300_equation_0, values = (var_3842_cast_fp16, var_4192_cast_fp16))[name = tensor("op_4300_cast_fp16")]; tensor var_4302_equation_0 = const()[name = tensor("op_4302_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4302_cast_fp16 = einsum(equation = var_4302_equation_0, values = (var_3846_cast_fp16, var_4193_cast_fp16))[name = tensor("op_4302_cast_fp16")]; tensor var_4304_equation_0 = const()[name = tensor("op_4304_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4304_cast_fp16 = einsum(equation = var_4304_equation_0, values = (var_3846_cast_fp16, var_4194_cast_fp16))[name = tensor("op_4304_cast_fp16")]; tensor var_4306_equation_0 = const()[name = tensor("op_4306_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4306_cast_fp16 = einsum(equation = var_4306_equation_0, values = (var_3846_cast_fp16, var_4195_cast_fp16))[name = tensor("op_4306_cast_fp16")]; tensor var_4308_equation_0 = const()[name = tensor("op_4308_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4308_cast_fp16 = einsum(equation = var_4308_equation_0, values = (var_3846_cast_fp16, var_4196_cast_fp16))[name = tensor("op_4308_cast_fp16")]; tensor var_4310_equation_0 = const()[name = tensor("op_4310_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4310_cast_fp16 = einsum(equation = var_4310_equation_0, values = (var_3846_cast_fp16, var_4197_cast_fp16))[name = tensor("op_4310_cast_fp16")]; tensor var_4312_equation_0 = const()[name = tensor("op_4312_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4312_cast_fp16 = einsum(equation = var_4312_equation_0, values = (var_3846_cast_fp16, var_4198_cast_fp16))[name = tensor("op_4312_cast_fp16")]; tensor var_4314_equation_0 = const()[name = tensor("op_4314_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4314_cast_fp16 = einsum(equation = var_4314_equation_0, values = (var_3850_cast_fp16, var_4199_cast_fp16))[name = tensor("op_4314_cast_fp16")]; tensor var_4316_equation_0 = const()[name = tensor("op_4316_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4316_cast_fp16 = einsum(equation = var_4316_equation_0, values = (var_3850_cast_fp16, var_4200_cast_fp16))[name = tensor("op_4316_cast_fp16")]; tensor var_4318_equation_0 = const()[name = tensor("op_4318_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4318_cast_fp16 = einsum(equation = var_4318_equation_0, values = (var_3850_cast_fp16, var_4201_cast_fp16))[name = tensor("op_4318_cast_fp16")]; tensor var_4320_equation_0 = const()[name = tensor("op_4320_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4320_cast_fp16 = einsum(equation = var_4320_equation_0, values = (var_3850_cast_fp16, var_4202_cast_fp16))[name = tensor("op_4320_cast_fp16")]; tensor var_4322_equation_0 = const()[name = tensor("op_4322_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4322_cast_fp16 = einsum(equation = var_4322_equation_0, values = (var_3850_cast_fp16, var_4203_cast_fp16))[name = tensor("op_4322_cast_fp16")]; tensor var_4324_equation_0 = const()[name = tensor("op_4324_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4324_cast_fp16 = einsum(equation = var_4324_equation_0, values = (var_3850_cast_fp16, var_4204_cast_fp16))[name = tensor("op_4324_cast_fp16")]; tensor var_4326_equation_0 = const()[name = tensor("op_4326_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4326_cast_fp16 = einsum(equation = var_4326_equation_0, values = (var_3854_cast_fp16, var_4205_cast_fp16))[name = tensor("op_4326_cast_fp16")]; tensor var_4328_equation_0 = const()[name = tensor("op_4328_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4328_cast_fp16 = einsum(equation = var_4328_equation_0, values = (var_3854_cast_fp16, var_4206_cast_fp16))[name = tensor("op_4328_cast_fp16")]; tensor var_4330_equation_0 = const()[name = tensor("op_4330_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4330_cast_fp16 = einsum(equation = var_4330_equation_0, values = (var_3854_cast_fp16, var_4207_cast_fp16))[name = tensor("op_4330_cast_fp16")]; tensor var_4332_equation_0 = const()[name = tensor("op_4332_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4332_cast_fp16 = einsum(equation = var_4332_equation_0, values = (var_3854_cast_fp16, var_4208_cast_fp16))[name = tensor("op_4332_cast_fp16")]; tensor var_4334_equation_0 = const()[name = tensor("op_4334_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4334_cast_fp16 = einsum(equation = var_4334_equation_0, values = (var_3854_cast_fp16, var_4209_cast_fp16))[name = tensor("op_4334_cast_fp16")]; tensor var_4336_equation_0 = const()[name = tensor("op_4336_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4336_cast_fp16 = einsum(equation = var_4336_equation_0, values = (var_3854_cast_fp16, var_4210_cast_fp16))[name = tensor("op_4336_cast_fp16")]; tensor var_4338_equation_0 = const()[name = tensor("op_4338_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4338_cast_fp16 = einsum(equation = var_4338_equation_0, values = (var_3858_cast_fp16, var_4211_cast_fp16))[name = tensor("op_4338_cast_fp16")]; tensor var_4340_equation_0 = const()[name = tensor("op_4340_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4340_cast_fp16 = einsum(equation = var_4340_equation_0, values = (var_3858_cast_fp16, var_4212_cast_fp16))[name = tensor("op_4340_cast_fp16")]; tensor var_4342_equation_0 = const()[name = tensor("op_4342_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4342_cast_fp16 = einsum(equation = var_4342_equation_0, values = (var_3858_cast_fp16, var_4213_cast_fp16))[name = tensor("op_4342_cast_fp16")]; tensor var_4344_equation_0 = const()[name = tensor("op_4344_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4344_cast_fp16 = einsum(equation = var_4344_equation_0, values = (var_3858_cast_fp16, var_4214_cast_fp16))[name = tensor("op_4344_cast_fp16")]; tensor var_4346_equation_0 = const()[name = tensor("op_4346_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4346_cast_fp16 = einsum(equation = var_4346_equation_0, values = (var_3858_cast_fp16, var_4215_cast_fp16))[name = tensor("op_4346_cast_fp16")]; tensor var_4348_equation_0 = const()[name = tensor("op_4348_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4348_cast_fp16 = einsum(equation = var_4348_equation_0, values = (var_3858_cast_fp16, var_4216_cast_fp16))[name = tensor("op_4348_cast_fp16")]; tensor var_4350_equation_0 = const()[name = tensor("op_4350_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4350_cast_fp16 = einsum(equation = var_4350_equation_0, values = (var_3862_cast_fp16, var_4217_cast_fp16))[name = tensor("op_4350_cast_fp16")]; tensor var_4352_equation_0 = const()[name = tensor("op_4352_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4352_cast_fp16 = einsum(equation = var_4352_equation_0, values = (var_3862_cast_fp16, var_4218_cast_fp16))[name = tensor("op_4352_cast_fp16")]; tensor var_4354_equation_0 = const()[name = tensor("op_4354_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4354_cast_fp16 = einsum(equation = var_4354_equation_0, values = (var_3862_cast_fp16, var_4219_cast_fp16))[name = tensor("op_4354_cast_fp16")]; tensor var_4356_equation_0 = const()[name = tensor("op_4356_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4356_cast_fp16 = einsum(equation = var_4356_equation_0, values = (var_3862_cast_fp16, var_4220_cast_fp16))[name = tensor("op_4356_cast_fp16")]; tensor var_4358_equation_0 = const()[name = tensor("op_4358_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4358_cast_fp16 = einsum(equation = var_4358_equation_0, values = (var_3862_cast_fp16, var_4221_cast_fp16))[name = tensor("op_4358_cast_fp16")]; tensor var_4360_equation_0 = const()[name = tensor("op_4360_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4360_cast_fp16 = einsum(equation = var_4360_equation_0, values = (var_3862_cast_fp16, var_4222_cast_fp16))[name = tensor("op_4360_cast_fp16")]; tensor var_4362_equation_0 = const()[name = tensor("op_4362_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4362_cast_fp16 = einsum(equation = var_4362_equation_0, values = (var_3866_cast_fp16, var_4223_cast_fp16))[name = tensor("op_4362_cast_fp16")]; tensor var_4364_equation_0 = const()[name = tensor("op_4364_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4364_cast_fp16 = einsum(equation = var_4364_equation_0, values = (var_3866_cast_fp16, var_4224_cast_fp16))[name = tensor("op_4364_cast_fp16")]; tensor var_4366_equation_0 = const()[name = tensor("op_4366_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4366_cast_fp16 = einsum(equation = var_4366_equation_0, values = (var_3866_cast_fp16, var_4225_cast_fp16))[name = tensor("op_4366_cast_fp16")]; tensor var_4368_equation_0 = const()[name = tensor("op_4368_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4368_cast_fp16 = einsum(equation = var_4368_equation_0, values = (var_3866_cast_fp16, var_4226_cast_fp16))[name = tensor("op_4368_cast_fp16")]; tensor var_4370_equation_0 = const()[name = tensor("op_4370_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4370_cast_fp16 = einsum(equation = var_4370_equation_0, values = (var_3866_cast_fp16, var_4227_cast_fp16))[name = tensor("op_4370_cast_fp16")]; tensor var_4372_equation_0 = const()[name = tensor("op_4372_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4372_cast_fp16 = einsum(equation = var_4372_equation_0, values = (var_3866_cast_fp16, var_4228_cast_fp16))[name = tensor("op_4372_cast_fp16")]; tensor var_4374_interleave_0 = const()[name = tensor("op_4374_interleave_0"), val = tensor(false)]; tensor var_4374_cast_fp16 = concat(axis = var_3585, interleave = var_4374_interleave_0, values = (var_4230_cast_fp16, var_4232_cast_fp16, var_4234_cast_fp16, var_4236_cast_fp16, var_4238_cast_fp16, var_4240_cast_fp16))[name = tensor("op_4374_cast_fp16")]; tensor var_4376_interleave_0 = const()[name = tensor("op_4376_interleave_0"), val = tensor(false)]; tensor var_4376_cast_fp16 = concat(axis = var_3585, interleave = var_4376_interleave_0, values = (var_4242_cast_fp16, var_4244_cast_fp16, var_4246_cast_fp16, var_4248_cast_fp16, var_4250_cast_fp16, var_4252_cast_fp16))[name = tensor("op_4376_cast_fp16")]; tensor var_4378_interleave_0 = const()[name = tensor("op_4378_interleave_0"), val = tensor(false)]; tensor var_4378_cast_fp16 = concat(axis = var_3585, interleave = var_4378_interleave_0, values = (var_4254_cast_fp16, var_4256_cast_fp16, var_4258_cast_fp16, var_4260_cast_fp16, var_4262_cast_fp16, var_4264_cast_fp16))[name = tensor("op_4378_cast_fp16")]; tensor var_4380_interleave_0 = const()[name = tensor("op_4380_interleave_0"), val = tensor(false)]; tensor var_4380_cast_fp16 = concat(axis = var_3585, interleave = var_4380_interleave_0, values = (var_4266_cast_fp16, var_4268_cast_fp16, var_4270_cast_fp16, var_4272_cast_fp16, var_4274_cast_fp16, var_4276_cast_fp16))[name = tensor("op_4380_cast_fp16")]; tensor var_4382_interleave_0 = const()[name = tensor("op_4382_interleave_0"), val = tensor(false)]; tensor var_4382_cast_fp16 = concat(axis = var_3585, interleave = var_4382_interleave_0, values = (var_4278_cast_fp16, var_4280_cast_fp16, var_4282_cast_fp16, var_4284_cast_fp16, var_4286_cast_fp16, var_4288_cast_fp16))[name = tensor("op_4382_cast_fp16")]; tensor var_4384_interleave_0 = const()[name = tensor("op_4384_interleave_0"), val = tensor(false)]; tensor var_4384_cast_fp16 = concat(axis = var_3585, interleave = var_4384_interleave_0, values = (var_4290_cast_fp16, var_4292_cast_fp16, var_4294_cast_fp16, var_4296_cast_fp16, var_4298_cast_fp16, var_4300_cast_fp16))[name = tensor("op_4384_cast_fp16")]; tensor var_4386_interleave_0 = const()[name = tensor("op_4386_interleave_0"), val = tensor(false)]; tensor var_4386_cast_fp16 = concat(axis = var_3585, interleave = var_4386_interleave_0, values = (var_4302_cast_fp16, var_4304_cast_fp16, var_4306_cast_fp16, var_4308_cast_fp16, var_4310_cast_fp16, var_4312_cast_fp16))[name = tensor("op_4386_cast_fp16")]; tensor var_4388_interleave_0 = const()[name = tensor("op_4388_interleave_0"), val = tensor(false)]; tensor var_4388_cast_fp16 = concat(axis = var_3585, interleave = var_4388_interleave_0, values = (var_4314_cast_fp16, var_4316_cast_fp16, var_4318_cast_fp16, var_4320_cast_fp16, var_4322_cast_fp16, var_4324_cast_fp16))[name = tensor("op_4388_cast_fp16")]; tensor var_4390_interleave_0 = const()[name = tensor("op_4390_interleave_0"), val = tensor(false)]; tensor var_4390_cast_fp16 = concat(axis = var_3585, interleave = var_4390_interleave_0, values = (var_4326_cast_fp16, var_4328_cast_fp16, var_4330_cast_fp16, var_4332_cast_fp16, var_4334_cast_fp16, var_4336_cast_fp16))[name = tensor("op_4390_cast_fp16")]; tensor var_4392_interleave_0 = const()[name = tensor("op_4392_interleave_0"), val = tensor(false)]; tensor var_4392_cast_fp16 = concat(axis = var_3585, interleave = var_4392_interleave_0, values = (var_4338_cast_fp16, var_4340_cast_fp16, var_4342_cast_fp16, var_4344_cast_fp16, var_4346_cast_fp16, var_4348_cast_fp16))[name = tensor("op_4392_cast_fp16")]; tensor var_4394_interleave_0 = const()[name = tensor("op_4394_interleave_0"), val = tensor(false)]; tensor var_4394_cast_fp16 = concat(axis = var_3585, interleave = var_4394_interleave_0, values = (var_4350_cast_fp16, var_4352_cast_fp16, var_4354_cast_fp16, var_4356_cast_fp16, var_4358_cast_fp16, var_4360_cast_fp16))[name = tensor("op_4394_cast_fp16")]; tensor var_4396_interleave_0 = const()[name = tensor("op_4396_interleave_0"), val = tensor(false)]; tensor var_4396_cast_fp16 = concat(axis = var_3585, interleave = var_4396_interleave_0, values = (var_4362_cast_fp16, var_4364_cast_fp16, var_4366_cast_fp16, var_4368_cast_fp16, var_4370_cast_fp16, var_4372_cast_fp16))[name = tensor("op_4396_cast_fp16")]; tensor input_33_interleave_0 = const()[name = tensor("input_33_interleave_0"), val = tensor(false)]; tensor input_33_cast_fp16 = concat(axis = var_3601, interleave = input_33_interleave_0, values = (var_4374_cast_fp16, var_4376_cast_fp16, var_4378_cast_fp16, var_4380_cast_fp16, var_4382_cast_fp16, var_4384_cast_fp16, var_4386_cast_fp16, var_4388_cast_fp16, var_4390_cast_fp16, var_4392_cast_fp16, var_4394_cast_fp16, var_4396_cast_fp16))[name = tensor("input_33_cast_fp16")]; tensor obj_19_pad_type_0 = const()[name = tensor("obj_19_pad_type_0"), val = tensor("valid")]; tensor obj_19_strides_0 = const()[name = tensor("obj_19_strides_0"), val = tensor([1, 1])]; tensor obj_19_pad_0 = const()[name = tensor("obj_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_19_dilations_0 = const()[name = tensor("obj_19_dilations_0"), val = tensor([1, 1])]; tensor obj_19_groups_0 = const()[name = tensor("obj_19_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66464448)))]; tensor layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67644160)))]; tensor obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = obj_19_dilations_0, groups = obj_19_groups_0, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = obj_19_strides_0, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("obj_19_cast_fp16")]; tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; tensor var_4415_to_fp16 = const()[name = tensor("op_4415_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_4415_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67645760)))]; tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67647360)))]; tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor input_37_pad_type_0 = const()[name = tensor("input_37_pad_type_0"), val = tensor("valid")]; tensor input_37_strides_0 = const()[name = tensor("input_37_strides_0"), val = tensor([1, 1])]; tensor input_37_pad_0 = const()[name = tensor("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_37_dilations_0 = const()[name = tensor("input_37_dilations_0"), val = tensor([1, 1])]; tensor input_37_groups_0 = const()[name = tensor("input_37_groups_0"), val = tensor(1)]; tensor layers_4_fc1_weight_to_fp16 = const()[name = tensor("layers_4_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67648960)))]; tensor layers_4_fc1_bias_to_fp16 = const()[name = tensor("layers_4_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72367616)))]; tensor input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("input_37_cast_fp16")]; tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor hidden_states_13_pad_type_0 = const()[name = tensor("hidden_states_13_pad_type_0"), val = tensor("valid")]; tensor hidden_states_13_strides_0 = const()[name = tensor("hidden_states_13_strides_0"), val = tensor([1, 1])]; tensor hidden_states_13_pad_0 = const()[name = tensor("hidden_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_13_dilations_0 = const()[name = tensor("hidden_states_13_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_13_groups_0 = const()[name = tensor("hidden_states_13_groups_0"), val = tensor(1)]; tensor layers_4_fc2_weight_to_fp16 = const()[name = tensor("layers_4_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72373824)))]; tensor layers_4_fc2_bias_to_fp16 = const()[name = tensor("layers_4_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77092480)))]; tensor hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = hidden_states_13_dilations_0, groups = hidden_states_13_groups_0, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = hidden_states_13_strides_0, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; tensor var_4447 = const()[name = tensor("op_4447"), val = tensor(3)]; tensor var_4463 = const()[name = tensor("op_4463"), val = tensor(1)]; tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; tensor var_4480_to_fp16 = const()[name = tensor("op_4480_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_4480_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77094080)))]; tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77095680)))]; tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; tensor query_11_pad_type_0 = const()[name = tensor("query_11_pad_type_0"), val = tensor("valid")]; tensor query_11_strides_0 = const()[name = tensor("query_11_strides_0"), val = tensor([1, 1])]; tensor query_11_pad_0 = const()[name = tensor("query_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_11_dilations_0 = const()[name = tensor("query_11_dilations_0"), val = tensor([1, 1])]; tensor query_11_groups_0 = const()[name = tensor("query_11_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77097280)))]; tensor layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78276992)))]; tensor query_11_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = query_11_dilations_0, groups = query_11_groups_0, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = query_11_strides_0, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("query_11_cast_fp16")]; tensor key_11_pad_type_0 = const()[name = tensor("key_11_pad_type_0"), val = tensor("valid")]; tensor key_11_strides_0 = const()[name = tensor("key_11_strides_0"), val = tensor([1, 1])]; tensor key_11_pad_0 = const()[name = tensor("key_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_11_dilations_0 = const()[name = tensor("key_11_dilations_0"), val = tensor([1, 1])]; tensor key_11_groups_0 = const()[name = tensor("key_11_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78278592)))]; tensor key_11_cast_fp16 = conv(dilations = key_11_dilations_0, groups = key_11_groups_0, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = key_11_strides_0, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("key_11_cast_fp16")]; tensor value_11_pad_type_0 = const()[name = tensor("value_11_pad_type_0"), val = tensor("valid")]; tensor value_11_strides_0 = const()[name = tensor("value_11_strides_0"), val = tensor([1, 1])]; tensor value_11_pad_0 = const()[name = tensor("value_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_11_dilations_0 = const()[name = tensor("value_11_dilations_0"), val = tensor([1, 1])]; tensor value_11_groups_0 = const()[name = tensor("value_11_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79458304)))]; tensor layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80638016)))]; tensor value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = value_11_dilations_0, groups = value_11_groups_0, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = value_11_strides_0, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("value_11_cast_fp16")]; tensor var_4515_begin_0 = const()[name = tensor("op_4515_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4515_end_0 = const()[name = tensor("op_4515_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4515_end_mask_0 = const()[name = tensor("op_4515_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4515_cast_fp16 = slice_by_index(begin = var_4515_begin_0, end = var_4515_end_0, end_mask = var_4515_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4515_cast_fp16")]; tensor var_4519_begin_0 = const()[name = tensor("op_4519_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_4519_end_0 = const()[name = tensor("op_4519_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_4519_end_mask_0 = const()[name = tensor("op_4519_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4519_cast_fp16 = slice_by_index(begin = var_4519_begin_0, end = var_4519_end_0, end_mask = var_4519_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4519_cast_fp16")]; tensor var_4523_begin_0 = const()[name = tensor("op_4523_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_4523_end_0 = const()[name = tensor("op_4523_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_4523_end_mask_0 = const()[name = tensor("op_4523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4523_cast_fp16 = slice_by_index(begin = var_4523_begin_0, end = var_4523_end_0, end_mask = var_4523_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4523_cast_fp16")]; tensor var_4527_begin_0 = const()[name = tensor("op_4527_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_4527_end_0 = const()[name = tensor("op_4527_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_4527_end_mask_0 = const()[name = tensor("op_4527_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4527_cast_fp16 = slice_by_index(begin = var_4527_begin_0, end = var_4527_end_0, end_mask = var_4527_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4527_cast_fp16")]; tensor var_4531_begin_0 = const()[name = tensor("op_4531_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_4531_end_0 = const()[name = tensor("op_4531_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_4531_end_mask_0 = const()[name = tensor("op_4531_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4531_cast_fp16 = slice_by_index(begin = var_4531_begin_0, end = var_4531_end_0, end_mask = var_4531_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4531_cast_fp16")]; tensor var_4535_begin_0 = const()[name = tensor("op_4535_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4535_end_0 = const()[name = tensor("op_4535_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_4535_end_mask_0 = const()[name = tensor("op_4535_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4535_cast_fp16 = slice_by_index(begin = var_4535_begin_0, end = var_4535_end_0, end_mask = var_4535_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4535_cast_fp16")]; tensor var_4539_begin_0 = const()[name = tensor("op_4539_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_4539_end_0 = const()[name = tensor("op_4539_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_4539_end_mask_0 = const()[name = tensor("op_4539_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4539_cast_fp16 = slice_by_index(begin = var_4539_begin_0, end = var_4539_end_0, end_mask = var_4539_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4539_cast_fp16")]; tensor var_4543_begin_0 = const()[name = tensor("op_4543_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_4543_end_0 = const()[name = tensor("op_4543_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_4543_end_mask_0 = const()[name = tensor("op_4543_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4543_cast_fp16 = slice_by_index(begin = var_4543_begin_0, end = var_4543_end_0, end_mask = var_4543_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4543_cast_fp16")]; tensor var_4547_begin_0 = const()[name = tensor("op_4547_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_4547_end_0 = const()[name = tensor("op_4547_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_4547_end_mask_0 = const()[name = tensor("op_4547_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4547_cast_fp16 = slice_by_index(begin = var_4547_begin_0, end = var_4547_end_0, end_mask = var_4547_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4547_cast_fp16")]; tensor var_4551_begin_0 = const()[name = tensor("op_4551_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_4551_end_0 = const()[name = tensor("op_4551_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_4551_end_mask_0 = const()[name = tensor("op_4551_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4551_cast_fp16 = slice_by_index(begin = var_4551_begin_0, end = var_4551_end_0, end_mask = var_4551_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4551_cast_fp16")]; tensor var_4555_begin_0 = const()[name = tensor("op_4555_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4555_end_0 = const()[name = tensor("op_4555_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_4555_end_mask_0 = const()[name = tensor("op_4555_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4555_cast_fp16 = slice_by_index(begin = var_4555_begin_0, end = var_4555_end_0, end_mask = var_4555_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4555_cast_fp16")]; tensor var_4559_begin_0 = const()[name = tensor("op_4559_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_4559_end_0 = const()[name = tensor("op_4559_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_4559_end_mask_0 = const()[name = tensor("op_4559_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4559_cast_fp16 = slice_by_index(begin = var_4559_begin_0, end = var_4559_end_0, end_mask = var_4559_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_4559_cast_fp16")]; tensor var_4562_begin_0 = const()[name = tensor("op_4562_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4562_end_0 = const()[name = tensor("op_4562_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4562_end_mask_0 = const()[name = tensor("op_4562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4562_cast_fp16 = slice_by_index(begin = var_4562_begin_0, end = var_4562_end_0, end_mask = var_4562_end_mask_0, x = var_4515_cast_fp16)[name = tensor("op_4562_cast_fp16")]; tensor var_4563_begin_0 = const()[name = tensor("op_4563_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4563_end_0 = const()[name = tensor("op_4563_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4563_end_mask_0 = const()[name = tensor("op_4563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4563_cast_fp16 = slice_by_index(begin = var_4563_begin_0, end = var_4563_end_0, end_mask = var_4563_end_mask_0, x = var_4515_cast_fp16)[name = tensor("op_4563_cast_fp16")]; tensor var_4564_begin_0 = const()[name = tensor("op_4564_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4564_end_0 = const()[name = tensor("op_4564_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4564_end_mask_0 = const()[name = tensor("op_4564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4564_cast_fp16 = slice_by_index(begin = var_4564_begin_0, end = var_4564_end_0, end_mask = var_4564_end_mask_0, x = var_4515_cast_fp16)[name = tensor("op_4564_cast_fp16")]; tensor var_4565_begin_0 = const()[name = tensor("op_4565_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4565_end_0 = const()[name = tensor("op_4565_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4565_end_mask_0 = const()[name = tensor("op_4565_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4565_cast_fp16 = slice_by_index(begin = var_4565_begin_0, end = var_4565_end_0, end_mask = var_4565_end_mask_0, x = var_4515_cast_fp16)[name = tensor("op_4565_cast_fp16")]; tensor var_4566_begin_0 = const()[name = tensor("op_4566_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4566_end_0 = const()[name = tensor("op_4566_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4566_end_mask_0 = const()[name = tensor("op_4566_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4566_cast_fp16 = slice_by_index(begin = var_4566_begin_0, end = var_4566_end_0, end_mask = var_4566_end_mask_0, x = var_4515_cast_fp16)[name = tensor("op_4566_cast_fp16")]; tensor var_4567_begin_0 = const()[name = tensor("op_4567_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4567_end_0 = const()[name = tensor("op_4567_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4567_end_mask_0 = const()[name = tensor("op_4567_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4567_cast_fp16 = slice_by_index(begin = var_4567_begin_0, end = var_4567_end_0, end_mask = var_4567_end_mask_0, x = var_4515_cast_fp16)[name = tensor("op_4567_cast_fp16")]; tensor var_4568_begin_0 = const()[name = tensor("op_4568_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4568_end_0 = const()[name = tensor("op_4568_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4568_end_mask_0 = const()[name = tensor("op_4568_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4568_cast_fp16 = slice_by_index(begin = var_4568_begin_0, end = var_4568_end_0, end_mask = var_4568_end_mask_0, x = var_4519_cast_fp16)[name = tensor("op_4568_cast_fp16")]; tensor var_4569_begin_0 = const()[name = tensor("op_4569_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4569_end_0 = const()[name = tensor("op_4569_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4569_end_mask_0 = const()[name = tensor("op_4569_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4569_cast_fp16 = slice_by_index(begin = var_4569_begin_0, end = var_4569_end_0, end_mask = var_4569_end_mask_0, x = var_4519_cast_fp16)[name = tensor("op_4569_cast_fp16")]; tensor var_4570_begin_0 = const()[name = tensor("op_4570_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4570_end_0 = const()[name = tensor("op_4570_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4570_end_mask_0 = const()[name = tensor("op_4570_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4570_cast_fp16 = slice_by_index(begin = var_4570_begin_0, end = var_4570_end_0, end_mask = var_4570_end_mask_0, x = var_4519_cast_fp16)[name = tensor("op_4570_cast_fp16")]; tensor var_4571_begin_0 = const()[name = tensor("op_4571_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4571_end_0 = const()[name = tensor("op_4571_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4571_end_mask_0 = const()[name = tensor("op_4571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4571_cast_fp16 = slice_by_index(begin = var_4571_begin_0, end = var_4571_end_0, end_mask = var_4571_end_mask_0, x = var_4519_cast_fp16)[name = tensor("op_4571_cast_fp16")]; tensor var_4572_begin_0 = const()[name = tensor("op_4572_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4572_end_0 = const()[name = tensor("op_4572_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4572_end_mask_0 = const()[name = tensor("op_4572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4572_cast_fp16 = slice_by_index(begin = var_4572_begin_0, end = var_4572_end_0, end_mask = var_4572_end_mask_0, x = var_4519_cast_fp16)[name = tensor("op_4572_cast_fp16")]; tensor var_4573_begin_0 = const()[name = tensor("op_4573_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4573_end_0 = const()[name = tensor("op_4573_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4573_end_mask_0 = const()[name = tensor("op_4573_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4573_cast_fp16 = slice_by_index(begin = var_4573_begin_0, end = var_4573_end_0, end_mask = var_4573_end_mask_0, x = var_4519_cast_fp16)[name = tensor("op_4573_cast_fp16")]; tensor var_4574_begin_0 = const()[name = tensor("op_4574_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4574_end_0 = const()[name = tensor("op_4574_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4574_end_mask_0 = const()[name = tensor("op_4574_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4574_cast_fp16 = slice_by_index(begin = var_4574_begin_0, end = var_4574_end_0, end_mask = var_4574_end_mask_0, x = var_4523_cast_fp16)[name = tensor("op_4574_cast_fp16")]; tensor var_4575_begin_0 = const()[name = tensor("op_4575_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4575_end_0 = const()[name = tensor("op_4575_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4575_end_mask_0 = const()[name = tensor("op_4575_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4575_cast_fp16 = slice_by_index(begin = var_4575_begin_0, end = var_4575_end_0, end_mask = var_4575_end_mask_0, x = var_4523_cast_fp16)[name = tensor("op_4575_cast_fp16")]; tensor var_4576_begin_0 = const()[name = tensor("op_4576_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4576_end_0 = const()[name = tensor("op_4576_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4576_end_mask_0 = const()[name = tensor("op_4576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4576_cast_fp16 = slice_by_index(begin = var_4576_begin_0, end = var_4576_end_0, end_mask = var_4576_end_mask_0, x = var_4523_cast_fp16)[name = tensor("op_4576_cast_fp16")]; tensor var_4577_begin_0 = const()[name = tensor("op_4577_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4577_end_0 = const()[name = tensor("op_4577_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4577_end_mask_0 = const()[name = tensor("op_4577_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4577_cast_fp16 = slice_by_index(begin = var_4577_begin_0, end = var_4577_end_0, end_mask = var_4577_end_mask_0, x = var_4523_cast_fp16)[name = tensor("op_4577_cast_fp16")]; tensor var_4578_begin_0 = const()[name = tensor("op_4578_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4578_end_0 = const()[name = tensor("op_4578_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4578_end_mask_0 = const()[name = tensor("op_4578_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4578_cast_fp16 = slice_by_index(begin = var_4578_begin_0, end = var_4578_end_0, end_mask = var_4578_end_mask_0, x = var_4523_cast_fp16)[name = tensor("op_4578_cast_fp16")]; tensor var_4579_begin_0 = const()[name = tensor("op_4579_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4579_end_0 = const()[name = tensor("op_4579_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4579_end_mask_0 = const()[name = tensor("op_4579_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4579_cast_fp16 = slice_by_index(begin = var_4579_begin_0, end = var_4579_end_0, end_mask = var_4579_end_mask_0, x = var_4523_cast_fp16)[name = tensor("op_4579_cast_fp16")]; tensor var_4580_begin_0 = const()[name = tensor("op_4580_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4580_end_0 = const()[name = tensor("op_4580_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4580_end_mask_0 = const()[name = tensor("op_4580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4580_cast_fp16 = slice_by_index(begin = var_4580_begin_0, end = var_4580_end_0, end_mask = var_4580_end_mask_0, x = var_4527_cast_fp16)[name = tensor("op_4580_cast_fp16")]; tensor var_4581_begin_0 = const()[name = tensor("op_4581_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4581_end_0 = const()[name = tensor("op_4581_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4581_end_mask_0 = const()[name = tensor("op_4581_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4581_cast_fp16 = slice_by_index(begin = var_4581_begin_0, end = var_4581_end_0, end_mask = var_4581_end_mask_0, x = var_4527_cast_fp16)[name = tensor("op_4581_cast_fp16")]; tensor var_4582_begin_0 = const()[name = tensor("op_4582_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4582_end_0 = const()[name = tensor("op_4582_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4582_end_mask_0 = const()[name = tensor("op_4582_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4582_cast_fp16 = slice_by_index(begin = var_4582_begin_0, end = var_4582_end_0, end_mask = var_4582_end_mask_0, x = var_4527_cast_fp16)[name = tensor("op_4582_cast_fp16")]; tensor var_4583_begin_0 = const()[name = tensor("op_4583_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4583_end_0 = const()[name = tensor("op_4583_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4583_end_mask_0 = const()[name = tensor("op_4583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4583_cast_fp16 = slice_by_index(begin = var_4583_begin_0, end = var_4583_end_0, end_mask = var_4583_end_mask_0, x = var_4527_cast_fp16)[name = tensor("op_4583_cast_fp16")]; tensor var_4584_begin_0 = const()[name = tensor("op_4584_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4584_end_0 = const()[name = tensor("op_4584_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4584_end_mask_0 = const()[name = tensor("op_4584_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4584_cast_fp16 = slice_by_index(begin = var_4584_begin_0, end = var_4584_end_0, end_mask = var_4584_end_mask_0, x = var_4527_cast_fp16)[name = tensor("op_4584_cast_fp16")]; tensor var_4585_begin_0 = const()[name = tensor("op_4585_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4585_end_0 = const()[name = tensor("op_4585_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4585_end_mask_0 = const()[name = tensor("op_4585_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4585_cast_fp16 = slice_by_index(begin = var_4585_begin_0, end = var_4585_end_0, end_mask = var_4585_end_mask_0, x = var_4527_cast_fp16)[name = tensor("op_4585_cast_fp16")]; tensor var_4586_begin_0 = const()[name = tensor("op_4586_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4586_end_0 = const()[name = tensor("op_4586_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4586_end_mask_0 = const()[name = tensor("op_4586_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4586_cast_fp16 = slice_by_index(begin = var_4586_begin_0, end = var_4586_end_0, end_mask = var_4586_end_mask_0, x = var_4531_cast_fp16)[name = tensor("op_4586_cast_fp16")]; tensor var_4587_begin_0 = const()[name = tensor("op_4587_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4587_end_0 = const()[name = tensor("op_4587_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4587_end_mask_0 = const()[name = tensor("op_4587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4587_cast_fp16 = slice_by_index(begin = var_4587_begin_0, end = var_4587_end_0, end_mask = var_4587_end_mask_0, x = var_4531_cast_fp16)[name = tensor("op_4587_cast_fp16")]; tensor var_4588_begin_0 = const()[name = tensor("op_4588_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4588_end_0 = const()[name = tensor("op_4588_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4588_end_mask_0 = const()[name = tensor("op_4588_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4588_cast_fp16 = slice_by_index(begin = var_4588_begin_0, end = var_4588_end_0, end_mask = var_4588_end_mask_0, x = var_4531_cast_fp16)[name = tensor("op_4588_cast_fp16")]; tensor var_4589_begin_0 = const()[name = tensor("op_4589_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4589_end_0 = const()[name = tensor("op_4589_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4589_end_mask_0 = const()[name = tensor("op_4589_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4589_cast_fp16 = slice_by_index(begin = var_4589_begin_0, end = var_4589_end_0, end_mask = var_4589_end_mask_0, x = var_4531_cast_fp16)[name = tensor("op_4589_cast_fp16")]; tensor var_4590_begin_0 = const()[name = tensor("op_4590_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4590_end_0 = const()[name = tensor("op_4590_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4590_end_mask_0 = const()[name = tensor("op_4590_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4590_cast_fp16 = slice_by_index(begin = var_4590_begin_0, end = var_4590_end_0, end_mask = var_4590_end_mask_0, x = var_4531_cast_fp16)[name = tensor("op_4590_cast_fp16")]; tensor var_4591_begin_0 = const()[name = tensor("op_4591_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4591_end_0 = const()[name = tensor("op_4591_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4591_end_mask_0 = const()[name = tensor("op_4591_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4591_cast_fp16 = slice_by_index(begin = var_4591_begin_0, end = var_4591_end_0, end_mask = var_4591_end_mask_0, x = var_4531_cast_fp16)[name = tensor("op_4591_cast_fp16")]; tensor var_4592_begin_0 = const()[name = tensor("op_4592_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4592_end_0 = const()[name = tensor("op_4592_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4592_end_mask_0 = const()[name = tensor("op_4592_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4592_cast_fp16 = slice_by_index(begin = var_4592_begin_0, end = var_4592_end_0, end_mask = var_4592_end_mask_0, x = var_4535_cast_fp16)[name = tensor("op_4592_cast_fp16")]; tensor var_4593_begin_0 = const()[name = tensor("op_4593_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4593_end_0 = const()[name = tensor("op_4593_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4593_end_mask_0 = const()[name = tensor("op_4593_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4593_cast_fp16 = slice_by_index(begin = var_4593_begin_0, end = var_4593_end_0, end_mask = var_4593_end_mask_0, x = var_4535_cast_fp16)[name = tensor("op_4593_cast_fp16")]; tensor var_4594_begin_0 = const()[name = tensor("op_4594_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4594_end_0 = const()[name = tensor("op_4594_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4594_end_mask_0 = const()[name = tensor("op_4594_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4594_cast_fp16 = slice_by_index(begin = var_4594_begin_0, end = var_4594_end_0, end_mask = var_4594_end_mask_0, x = var_4535_cast_fp16)[name = tensor("op_4594_cast_fp16")]; tensor var_4595_begin_0 = const()[name = tensor("op_4595_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4595_end_0 = const()[name = tensor("op_4595_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4595_end_mask_0 = const()[name = tensor("op_4595_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4595_cast_fp16 = slice_by_index(begin = var_4595_begin_0, end = var_4595_end_0, end_mask = var_4595_end_mask_0, x = var_4535_cast_fp16)[name = tensor("op_4595_cast_fp16")]; tensor var_4596_begin_0 = const()[name = tensor("op_4596_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4596_end_0 = const()[name = tensor("op_4596_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4596_end_mask_0 = const()[name = tensor("op_4596_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4596_cast_fp16 = slice_by_index(begin = var_4596_begin_0, end = var_4596_end_0, end_mask = var_4596_end_mask_0, x = var_4535_cast_fp16)[name = tensor("op_4596_cast_fp16")]; tensor var_4597_begin_0 = const()[name = tensor("op_4597_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4597_end_0 = const()[name = tensor("op_4597_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4597_end_mask_0 = const()[name = tensor("op_4597_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4597_cast_fp16 = slice_by_index(begin = var_4597_begin_0, end = var_4597_end_0, end_mask = var_4597_end_mask_0, x = var_4535_cast_fp16)[name = tensor("op_4597_cast_fp16")]; tensor var_4598_begin_0 = const()[name = tensor("op_4598_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4598_end_0 = const()[name = tensor("op_4598_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4598_end_mask_0 = const()[name = tensor("op_4598_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4598_cast_fp16 = slice_by_index(begin = var_4598_begin_0, end = var_4598_end_0, end_mask = var_4598_end_mask_0, x = var_4539_cast_fp16)[name = tensor("op_4598_cast_fp16")]; tensor var_4599_begin_0 = const()[name = tensor("op_4599_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4599_end_0 = const()[name = tensor("op_4599_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4599_end_mask_0 = const()[name = tensor("op_4599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4599_cast_fp16 = slice_by_index(begin = var_4599_begin_0, end = var_4599_end_0, end_mask = var_4599_end_mask_0, x = var_4539_cast_fp16)[name = tensor("op_4599_cast_fp16")]; tensor var_4600_begin_0 = const()[name = tensor("op_4600_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4600_end_0 = const()[name = tensor("op_4600_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4600_end_mask_0 = const()[name = tensor("op_4600_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4600_cast_fp16 = slice_by_index(begin = var_4600_begin_0, end = var_4600_end_0, end_mask = var_4600_end_mask_0, x = var_4539_cast_fp16)[name = tensor("op_4600_cast_fp16")]; tensor var_4601_begin_0 = const()[name = tensor("op_4601_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4601_end_0 = const()[name = tensor("op_4601_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4601_end_mask_0 = const()[name = tensor("op_4601_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4601_cast_fp16 = slice_by_index(begin = var_4601_begin_0, end = var_4601_end_0, end_mask = var_4601_end_mask_0, x = var_4539_cast_fp16)[name = tensor("op_4601_cast_fp16")]; tensor var_4602_begin_0 = const()[name = tensor("op_4602_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4602_end_0 = const()[name = tensor("op_4602_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4602_end_mask_0 = const()[name = tensor("op_4602_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4602_cast_fp16 = slice_by_index(begin = var_4602_begin_0, end = var_4602_end_0, end_mask = var_4602_end_mask_0, x = var_4539_cast_fp16)[name = tensor("op_4602_cast_fp16")]; tensor var_4603_begin_0 = const()[name = tensor("op_4603_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4603_end_0 = const()[name = tensor("op_4603_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4603_end_mask_0 = const()[name = tensor("op_4603_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4603_cast_fp16 = slice_by_index(begin = var_4603_begin_0, end = var_4603_end_0, end_mask = var_4603_end_mask_0, x = var_4539_cast_fp16)[name = tensor("op_4603_cast_fp16")]; tensor var_4604_begin_0 = const()[name = tensor("op_4604_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4604_end_0 = const()[name = tensor("op_4604_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4604_end_mask_0 = const()[name = tensor("op_4604_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4604_cast_fp16 = slice_by_index(begin = var_4604_begin_0, end = var_4604_end_0, end_mask = var_4604_end_mask_0, x = var_4543_cast_fp16)[name = tensor("op_4604_cast_fp16")]; tensor var_4605_begin_0 = const()[name = tensor("op_4605_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4605_end_0 = const()[name = tensor("op_4605_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4605_end_mask_0 = const()[name = tensor("op_4605_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4605_cast_fp16 = slice_by_index(begin = var_4605_begin_0, end = var_4605_end_0, end_mask = var_4605_end_mask_0, x = var_4543_cast_fp16)[name = tensor("op_4605_cast_fp16")]; tensor var_4606_begin_0 = const()[name = tensor("op_4606_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4606_end_0 = const()[name = tensor("op_4606_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4606_end_mask_0 = const()[name = tensor("op_4606_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4606_cast_fp16 = slice_by_index(begin = var_4606_begin_0, end = var_4606_end_0, end_mask = var_4606_end_mask_0, x = var_4543_cast_fp16)[name = tensor("op_4606_cast_fp16")]; tensor var_4607_begin_0 = const()[name = tensor("op_4607_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4607_end_0 = const()[name = tensor("op_4607_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4607_end_mask_0 = const()[name = tensor("op_4607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4607_cast_fp16 = slice_by_index(begin = var_4607_begin_0, end = var_4607_end_0, end_mask = var_4607_end_mask_0, x = var_4543_cast_fp16)[name = tensor("op_4607_cast_fp16")]; tensor var_4608_begin_0 = const()[name = tensor("op_4608_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4608_end_0 = const()[name = tensor("op_4608_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4608_end_mask_0 = const()[name = tensor("op_4608_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4608_cast_fp16 = slice_by_index(begin = var_4608_begin_0, end = var_4608_end_0, end_mask = var_4608_end_mask_0, x = var_4543_cast_fp16)[name = tensor("op_4608_cast_fp16")]; tensor var_4609_begin_0 = const()[name = tensor("op_4609_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4609_end_0 = const()[name = tensor("op_4609_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4609_end_mask_0 = const()[name = tensor("op_4609_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4609_cast_fp16 = slice_by_index(begin = var_4609_begin_0, end = var_4609_end_0, end_mask = var_4609_end_mask_0, x = var_4543_cast_fp16)[name = tensor("op_4609_cast_fp16")]; tensor var_4610_begin_0 = const()[name = tensor("op_4610_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4610_end_0 = const()[name = tensor("op_4610_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4610_end_mask_0 = const()[name = tensor("op_4610_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4610_cast_fp16 = slice_by_index(begin = var_4610_begin_0, end = var_4610_end_0, end_mask = var_4610_end_mask_0, x = var_4547_cast_fp16)[name = tensor("op_4610_cast_fp16")]; tensor var_4611_begin_0 = const()[name = tensor("op_4611_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4611_end_0 = const()[name = tensor("op_4611_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4611_end_mask_0 = const()[name = tensor("op_4611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4611_cast_fp16 = slice_by_index(begin = var_4611_begin_0, end = var_4611_end_0, end_mask = var_4611_end_mask_0, x = var_4547_cast_fp16)[name = tensor("op_4611_cast_fp16")]; tensor var_4612_begin_0 = const()[name = tensor("op_4612_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4612_end_0 = const()[name = tensor("op_4612_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4612_end_mask_0 = const()[name = tensor("op_4612_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4612_cast_fp16 = slice_by_index(begin = var_4612_begin_0, end = var_4612_end_0, end_mask = var_4612_end_mask_0, x = var_4547_cast_fp16)[name = tensor("op_4612_cast_fp16")]; tensor var_4613_begin_0 = const()[name = tensor("op_4613_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4613_end_0 = const()[name = tensor("op_4613_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4613_end_mask_0 = const()[name = tensor("op_4613_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4613_cast_fp16 = slice_by_index(begin = var_4613_begin_0, end = var_4613_end_0, end_mask = var_4613_end_mask_0, x = var_4547_cast_fp16)[name = tensor("op_4613_cast_fp16")]; tensor var_4614_begin_0 = const()[name = tensor("op_4614_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4614_end_0 = const()[name = tensor("op_4614_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4614_end_mask_0 = const()[name = tensor("op_4614_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4614_cast_fp16 = slice_by_index(begin = var_4614_begin_0, end = var_4614_end_0, end_mask = var_4614_end_mask_0, x = var_4547_cast_fp16)[name = tensor("op_4614_cast_fp16")]; tensor var_4615_begin_0 = const()[name = tensor("op_4615_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4615_end_0 = const()[name = tensor("op_4615_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4615_end_mask_0 = const()[name = tensor("op_4615_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4615_cast_fp16 = slice_by_index(begin = var_4615_begin_0, end = var_4615_end_0, end_mask = var_4615_end_mask_0, x = var_4547_cast_fp16)[name = tensor("op_4615_cast_fp16")]; tensor var_4616_begin_0 = const()[name = tensor("op_4616_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4616_end_0 = const()[name = tensor("op_4616_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4616_end_mask_0 = const()[name = tensor("op_4616_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4616_cast_fp16 = slice_by_index(begin = var_4616_begin_0, end = var_4616_end_0, end_mask = var_4616_end_mask_0, x = var_4551_cast_fp16)[name = tensor("op_4616_cast_fp16")]; tensor var_4617_begin_0 = const()[name = tensor("op_4617_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4617_end_0 = const()[name = tensor("op_4617_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4617_end_mask_0 = const()[name = tensor("op_4617_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4617_cast_fp16 = slice_by_index(begin = var_4617_begin_0, end = var_4617_end_0, end_mask = var_4617_end_mask_0, x = var_4551_cast_fp16)[name = tensor("op_4617_cast_fp16")]; tensor var_4618_begin_0 = const()[name = tensor("op_4618_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4618_end_0 = const()[name = tensor("op_4618_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4618_end_mask_0 = const()[name = tensor("op_4618_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4618_cast_fp16 = slice_by_index(begin = var_4618_begin_0, end = var_4618_end_0, end_mask = var_4618_end_mask_0, x = var_4551_cast_fp16)[name = tensor("op_4618_cast_fp16")]; tensor var_4619_begin_0 = const()[name = tensor("op_4619_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4619_end_0 = const()[name = tensor("op_4619_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4619_end_mask_0 = const()[name = tensor("op_4619_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4619_cast_fp16 = slice_by_index(begin = var_4619_begin_0, end = var_4619_end_0, end_mask = var_4619_end_mask_0, x = var_4551_cast_fp16)[name = tensor("op_4619_cast_fp16")]; tensor var_4620_begin_0 = const()[name = tensor("op_4620_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4620_end_0 = const()[name = tensor("op_4620_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4620_end_mask_0 = const()[name = tensor("op_4620_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4620_cast_fp16 = slice_by_index(begin = var_4620_begin_0, end = var_4620_end_0, end_mask = var_4620_end_mask_0, x = var_4551_cast_fp16)[name = tensor("op_4620_cast_fp16")]; tensor var_4621_begin_0 = const()[name = tensor("op_4621_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4621_end_0 = const()[name = tensor("op_4621_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4621_end_mask_0 = const()[name = tensor("op_4621_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4621_cast_fp16 = slice_by_index(begin = var_4621_begin_0, end = var_4621_end_0, end_mask = var_4621_end_mask_0, x = var_4551_cast_fp16)[name = tensor("op_4621_cast_fp16")]; tensor var_4622_begin_0 = const()[name = tensor("op_4622_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4622_end_0 = const()[name = tensor("op_4622_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4622_end_mask_0 = const()[name = tensor("op_4622_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4622_cast_fp16 = slice_by_index(begin = var_4622_begin_0, end = var_4622_end_0, end_mask = var_4622_end_mask_0, x = var_4555_cast_fp16)[name = tensor("op_4622_cast_fp16")]; tensor var_4623_begin_0 = const()[name = tensor("op_4623_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4623_end_0 = const()[name = tensor("op_4623_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4623_end_mask_0 = const()[name = tensor("op_4623_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4623_cast_fp16 = slice_by_index(begin = var_4623_begin_0, end = var_4623_end_0, end_mask = var_4623_end_mask_0, x = var_4555_cast_fp16)[name = tensor("op_4623_cast_fp16")]; tensor var_4624_begin_0 = const()[name = tensor("op_4624_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4624_end_0 = const()[name = tensor("op_4624_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4624_end_mask_0 = const()[name = tensor("op_4624_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4624_cast_fp16 = slice_by_index(begin = var_4624_begin_0, end = var_4624_end_0, end_mask = var_4624_end_mask_0, x = var_4555_cast_fp16)[name = tensor("op_4624_cast_fp16")]; tensor var_4625_begin_0 = const()[name = tensor("op_4625_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4625_end_0 = const()[name = tensor("op_4625_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4625_end_mask_0 = const()[name = tensor("op_4625_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4625_cast_fp16 = slice_by_index(begin = var_4625_begin_0, end = var_4625_end_0, end_mask = var_4625_end_mask_0, x = var_4555_cast_fp16)[name = tensor("op_4625_cast_fp16")]; tensor var_4626_begin_0 = const()[name = tensor("op_4626_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4626_end_0 = const()[name = tensor("op_4626_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4626_end_mask_0 = const()[name = tensor("op_4626_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4626_cast_fp16 = slice_by_index(begin = var_4626_begin_0, end = var_4626_end_0, end_mask = var_4626_end_mask_0, x = var_4555_cast_fp16)[name = tensor("op_4626_cast_fp16")]; tensor var_4627_begin_0 = const()[name = tensor("op_4627_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4627_end_0 = const()[name = tensor("op_4627_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4627_end_mask_0 = const()[name = tensor("op_4627_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4627_cast_fp16 = slice_by_index(begin = var_4627_begin_0, end = var_4627_end_0, end_mask = var_4627_end_mask_0, x = var_4555_cast_fp16)[name = tensor("op_4627_cast_fp16")]; tensor var_4628_begin_0 = const()[name = tensor("op_4628_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4628_end_0 = const()[name = tensor("op_4628_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_4628_end_mask_0 = const()[name = tensor("op_4628_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4628_cast_fp16 = slice_by_index(begin = var_4628_begin_0, end = var_4628_end_0, end_mask = var_4628_end_mask_0, x = var_4559_cast_fp16)[name = tensor("op_4628_cast_fp16")]; tensor var_4629_begin_0 = const()[name = tensor("op_4629_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4629_end_0 = const()[name = tensor("op_4629_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_4629_end_mask_0 = const()[name = tensor("op_4629_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4629_cast_fp16 = slice_by_index(begin = var_4629_begin_0, end = var_4629_end_0, end_mask = var_4629_end_mask_0, x = var_4559_cast_fp16)[name = tensor("op_4629_cast_fp16")]; tensor var_4630_begin_0 = const()[name = tensor("op_4630_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4630_end_0 = const()[name = tensor("op_4630_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_4630_end_mask_0 = const()[name = tensor("op_4630_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4630_cast_fp16 = slice_by_index(begin = var_4630_begin_0, end = var_4630_end_0, end_mask = var_4630_end_mask_0, x = var_4559_cast_fp16)[name = tensor("op_4630_cast_fp16")]; tensor var_4631_begin_0 = const()[name = tensor("op_4631_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4631_end_0 = const()[name = tensor("op_4631_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_4631_end_mask_0 = const()[name = tensor("op_4631_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4631_cast_fp16 = slice_by_index(begin = var_4631_begin_0, end = var_4631_end_0, end_mask = var_4631_end_mask_0, x = var_4559_cast_fp16)[name = tensor("op_4631_cast_fp16")]; tensor var_4632_begin_0 = const()[name = tensor("op_4632_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4632_end_0 = const()[name = tensor("op_4632_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_4632_end_mask_0 = const()[name = tensor("op_4632_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4632_cast_fp16 = slice_by_index(begin = var_4632_begin_0, end = var_4632_end_0, end_mask = var_4632_end_mask_0, x = var_4559_cast_fp16)[name = tensor("op_4632_cast_fp16")]; tensor var_4633_begin_0 = const()[name = tensor("op_4633_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_4633_end_0 = const()[name = tensor("op_4633_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_4633_end_mask_0 = const()[name = tensor("op_4633_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4633_cast_fp16 = slice_by_index(begin = var_4633_begin_0, end = var_4633_end_0, end_mask = var_4633_end_mask_0, x = var_4559_cast_fp16)[name = tensor("op_4633_cast_fp16")]; tensor k_11_perm_0 = const()[name = tensor("k_11_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_4638_begin_0 = const()[name = tensor("op_4638_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4638_end_0 = const()[name = tensor("op_4638_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_4638_end_mask_0 = const()[name = tensor("op_4638_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = key_11_cast_fp16)[name = tensor("transpose_6")]; tensor var_4638_cast_fp16 = slice_by_index(begin = var_4638_begin_0, end = var_4638_end_0, end_mask = var_4638_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4638_cast_fp16")]; tensor var_4642_begin_0 = const()[name = tensor("op_4642_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_4642_end_0 = const()[name = tensor("op_4642_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_4642_end_mask_0 = const()[name = tensor("op_4642_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4642_cast_fp16 = slice_by_index(begin = var_4642_begin_0, end = var_4642_end_0, end_mask = var_4642_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4642_cast_fp16")]; tensor var_4646_begin_0 = const()[name = tensor("op_4646_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_4646_end_0 = const()[name = tensor("op_4646_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_4646_end_mask_0 = const()[name = tensor("op_4646_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4646_cast_fp16 = slice_by_index(begin = var_4646_begin_0, end = var_4646_end_0, end_mask = var_4646_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4646_cast_fp16")]; tensor var_4650_begin_0 = const()[name = tensor("op_4650_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_4650_end_0 = const()[name = tensor("op_4650_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_4650_end_mask_0 = const()[name = tensor("op_4650_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4650_cast_fp16 = slice_by_index(begin = var_4650_begin_0, end = var_4650_end_0, end_mask = var_4650_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4650_cast_fp16")]; tensor var_4654_begin_0 = const()[name = tensor("op_4654_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4654_end_0 = const()[name = tensor("op_4654_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_4654_end_mask_0 = const()[name = tensor("op_4654_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4654_cast_fp16 = slice_by_index(begin = var_4654_begin_0, end = var_4654_end_0, end_mask = var_4654_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4654_cast_fp16")]; tensor var_4658_begin_0 = const()[name = tensor("op_4658_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_4658_end_0 = const()[name = tensor("op_4658_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_4658_end_mask_0 = const()[name = tensor("op_4658_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4658_cast_fp16 = slice_by_index(begin = var_4658_begin_0, end = var_4658_end_0, end_mask = var_4658_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4658_cast_fp16")]; tensor var_4662_begin_0 = const()[name = tensor("op_4662_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_4662_end_0 = const()[name = tensor("op_4662_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_4662_end_mask_0 = const()[name = tensor("op_4662_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4662_cast_fp16 = slice_by_index(begin = var_4662_begin_0, end = var_4662_end_0, end_mask = var_4662_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4662_cast_fp16")]; tensor var_4666_begin_0 = const()[name = tensor("op_4666_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_4666_end_0 = const()[name = tensor("op_4666_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_4666_end_mask_0 = const()[name = tensor("op_4666_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4666_cast_fp16 = slice_by_index(begin = var_4666_begin_0, end = var_4666_end_0, end_mask = var_4666_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4666_cast_fp16")]; tensor var_4670_begin_0 = const()[name = tensor("op_4670_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4670_end_0 = const()[name = tensor("op_4670_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_4670_end_mask_0 = const()[name = tensor("op_4670_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4670_cast_fp16 = slice_by_index(begin = var_4670_begin_0, end = var_4670_end_0, end_mask = var_4670_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4670_cast_fp16")]; tensor var_4674_begin_0 = const()[name = tensor("op_4674_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_4674_end_0 = const()[name = tensor("op_4674_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_4674_end_mask_0 = const()[name = tensor("op_4674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4674_cast_fp16 = slice_by_index(begin = var_4674_begin_0, end = var_4674_end_0, end_mask = var_4674_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4674_cast_fp16")]; tensor var_4678_begin_0 = const()[name = tensor("op_4678_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_4678_end_0 = const()[name = tensor("op_4678_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_4678_end_mask_0 = const()[name = tensor("op_4678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4678_cast_fp16 = slice_by_index(begin = var_4678_begin_0, end = var_4678_end_0, end_mask = var_4678_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4678_cast_fp16")]; tensor var_4682_begin_0 = const()[name = tensor("op_4682_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_4682_end_0 = const()[name = tensor("op_4682_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_4682_end_mask_0 = const()[name = tensor("op_4682_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4682_cast_fp16 = slice_by_index(begin = var_4682_begin_0, end = var_4682_end_0, end_mask = var_4682_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_4682_cast_fp16")]; tensor var_4684_begin_0 = const()[name = tensor("op_4684_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4684_end_0 = const()[name = tensor("op_4684_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4684_end_mask_0 = const()[name = tensor("op_4684_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4684_cast_fp16 = slice_by_index(begin = var_4684_begin_0, end = var_4684_end_0, end_mask = var_4684_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4684_cast_fp16")]; tensor var_4688_begin_0 = const()[name = tensor("op_4688_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_4688_end_0 = const()[name = tensor("op_4688_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_4688_end_mask_0 = const()[name = tensor("op_4688_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4688_cast_fp16 = slice_by_index(begin = var_4688_begin_0, end = var_4688_end_0, end_mask = var_4688_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4688_cast_fp16")]; tensor var_4692_begin_0 = const()[name = tensor("op_4692_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_4692_end_0 = const()[name = tensor("op_4692_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_4692_end_mask_0 = const()[name = tensor("op_4692_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4692_cast_fp16 = slice_by_index(begin = var_4692_begin_0, end = var_4692_end_0, end_mask = var_4692_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4692_cast_fp16")]; tensor var_4696_begin_0 = const()[name = tensor("op_4696_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_4696_end_0 = const()[name = tensor("op_4696_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_4696_end_mask_0 = const()[name = tensor("op_4696_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4696_cast_fp16 = slice_by_index(begin = var_4696_begin_0, end = var_4696_end_0, end_mask = var_4696_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4696_cast_fp16")]; tensor var_4700_begin_0 = const()[name = tensor("op_4700_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_4700_end_0 = const()[name = tensor("op_4700_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_4700_end_mask_0 = const()[name = tensor("op_4700_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4700_cast_fp16 = slice_by_index(begin = var_4700_begin_0, end = var_4700_end_0, end_mask = var_4700_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4700_cast_fp16")]; tensor var_4704_begin_0 = const()[name = tensor("op_4704_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4704_end_0 = const()[name = tensor("op_4704_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_4704_end_mask_0 = const()[name = tensor("op_4704_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4704_cast_fp16 = slice_by_index(begin = var_4704_begin_0, end = var_4704_end_0, end_mask = var_4704_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4704_cast_fp16")]; tensor var_4708_begin_0 = const()[name = tensor("op_4708_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_4708_end_0 = const()[name = tensor("op_4708_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_4708_end_mask_0 = const()[name = tensor("op_4708_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4708_cast_fp16 = slice_by_index(begin = var_4708_begin_0, end = var_4708_end_0, end_mask = var_4708_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4708_cast_fp16")]; tensor var_4712_begin_0 = const()[name = tensor("op_4712_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_4712_end_0 = const()[name = tensor("op_4712_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_4712_end_mask_0 = const()[name = tensor("op_4712_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4712_cast_fp16 = slice_by_index(begin = var_4712_begin_0, end = var_4712_end_0, end_mask = var_4712_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4712_cast_fp16")]; tensor var_4716_begin_0 = const()[name = tensor("op_4716_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_4716_end_0 = const()[name = tensor("op_4716_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_4716_end_mask_0 = const()[name = tensor("op_4716_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4716_cast_fp16 = slice_by_index(begin = var_4716_begin_0, end = var_4716_end_0, end_mask = var_4716_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4716_cast_fp16")]; tensor var_4720_begin_0 = const()[name = tensor("op_4720_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_4720_end_0 = const()[name = tensor("op_4720_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_4720_end_mask_0 = const()[name = tensor("op_4720_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4720_cast_fp16 = slice_by_index(begin = var_4720_begin_0, end = var_4720_end_0, end_mask = var_4720_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4720_cast_fp16")]; tensor var_4724_begin_0 = const()[name = tensor("op_4724_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4724_end_0 = const()[name = tensor("op_4724_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_4724_end_mask_0 = const()[name = tensor("op_4724_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4724_cast_fp16 = slice_by_index(begin = var_4724_begin_0, end = var_4724_end_0, end_mask = var_4724_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4724_cast_fp16")]; tensor var_4728_begin_0 = const()[name = tensor("op_4728_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_4728_end_0 = const()[name = tensor("op_4728_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_4728_end_mask_0 = const()[name = tensor("op_4728_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_4728_cast_fp16 = slice_by_index(begin = var_4728_begin_0, end = var_4728_end_0, end_mask = var_4728_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_4728_cast_fp16")]; tensor _SplitHeadsQ__mh_w_721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_721_equation_0, values = (var_4638_cast_fp16, var_4562_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_723_equation_0, values = (var_4638_cast_fp16, var_4563_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_725_equation_0, values = (var_4638_cast_fp16, var_4564_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_727_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_727_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_727_equation_0, values = (var_4638_cast_fp16, var_4565_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_727_cast_fp16")]; tensor _SplitHeadsQ__mh_w_729_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_729_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_729_equation_0, values = (var_4638_cast_fp16, var_4566_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_731_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_731_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_731_equation_0, values = (var_4638_cast_fp16, var_4567_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_731_cast_fp16")]; tensor _SplitHeadsQ__mh_w_733_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_733_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_733_equation_0, values = (var_4642_cast_fp16, var_4568_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_733_cast_fp16")]; tensor _SplitHeadsQ__mh_w_735_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_735_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_735_equation_0, values = (var_4642_cast_fp16, var_4569_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_735_cast_fp16")]; tensor _SplitHeadsQ__mh_w_737_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_737_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_737_equation_0, values = (var_4642_cast_fp16, var_4570_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_737_cast_fp16")]; tensor _SplitHeadsQ__mh_w_739_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_739_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_739_equation_0, values = (var_4642_cast_fp16, var_4571_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_739_cast_fp16")]; tensor _SplitHeadsQ__mh_w_741_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_741_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_741_equation_0, values = (var_4642_cast_fp16, var_4572_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_741_cast_fp16")]; tensor _SplitHeadsQ__mh_w_743_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_743_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_743_equation_0, values = (var_4642_cast_fp16, var_4573_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_743_cast_fp16")]; tensor _SplitHeadsQ__mh_w_745_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_745_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_745_equation_0, values = (var_4646_cast_fp16, var_4574_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_745_cast_fp16")]; tensor _SplitHeadsQ__mh_w_747_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_747_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_747_equation_0, values = (var_4646_cast_fp16, var_4575_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_747_cast_fp16")]; tensor _SplitHeadsQ__mh_w_749_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_749_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_749_equation_0, values = (var_4646_cast_fp16, var_4576_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_749_cast_fp16")]; tensor _SplitHeadsQ__mh_w_751_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_751_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_751_equation_0, values = (var_4646_cast_fp16, var_4577_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_751_cast_fp16")]; tensor _SplitHeadsQ__mh_w_753_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_753_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_753_equation_0, values = (var_4646_cast_fp16, var_4578_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_753_cast_fp16")]; tensor _SplitHeadsQ__mh_w_755_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_755_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_755_equation_0, values = (var_4646_cast_fp16, var_4579_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_755_cast_fp16")]; tensor _SplitHeadsQ__mh_w_757_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_757_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_757_equation_0, values = (var_4650_cast_fp16, var_4580_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_759_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_759_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_759_equation_0, values = (var_4650_cast_fp16, var_4581_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_759_cast_fp16")]; tensor _SplitHeadsQ__mh_w_761_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_761_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_761_equation_0, values = (var_4650_cast_fp16, var_4582_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_761_cast_fp16")]; tensor _SplitHeadsQ__mh_w_763_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_763_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_763_equation_0, values = (var_4650_cast_fp16, var_4583_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_763_cast_fp16")]; tensor _SplitHeadsQ__mh_w_765_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_765_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_765_equation_0, values = (var_4650_cast_fp16, var_4584_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_765_cast_fp16")]; tensor _SplitHeadsQ__mh_w_767_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_767_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_767_equation_0, values = (var_4650_cast_fp16, var_4585_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_767_cast_fp16")]; tensor _SplitHeadsQ__mh_w_769_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_769_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_769_equation_0, values = (var_4654_cast_fp16, var_4586_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_769_cast_fp16")]; tensor _SplitHeadsQ__mh_w_771_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_771_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_771_equation_0, values = (var_4654_cast_fp16, var_4587_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_771_cast_fp16")]; tensor _SplitHeadsQ__mh_w_773_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_773_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_773_equation_0, values = (var_4654_cast_fp16, var_4588_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_773_cast_fp16")]; tensor _SplitHeadsQ__mh_w_775_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_775_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_775_equation_0, values = (var_4654_cast_fp16, var_4589_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_777_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_777_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_777_equation_0, values = (var_4654_cast_fp16, var_4590_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_777_cast_fp16")]; tensor _SplitHeadsQ__mh_w_779_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_779_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_779_equation_0, values = (var_4654_cast_fp16, var_4591_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_779_cast_fp16")]; tensor _SplitHeadsQ__mh_w_781_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_781_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_781_equation_0, values = (var_4658_cast_fp16, var_4592_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_781_cast_fp16")]; tensor _SplitHeadsQ__mh_w_783_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_783_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_783_equation_0, values = (var_4658_cast_fp16, var_4593_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_783_cast_fp16")]; tensor _SplitHeadsQ__mh_w_785_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_785_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_785_equation_0, values = (var_4658_cast_fp16, var_4594_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_785_cast_fp16")]; tensor _SplitHeadsQ__mh_w_787_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_787_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_787_equation_0, values = (var_4658_cast_fp16, var_4595_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_787_cast_fp16")]; tensor _SplitHeadsQ__mh_w_789_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_789_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_789_equation_0, values = (var_4658_cast_fp16, var_4596_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_789_cast_fp16")]; tensor _SplitHeadsQ__mh_w_791_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_791_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_791_equation_0, values = (var_4658_cast_fp16, var_4597_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_791_cast_fp16")]; tensor _SplitHeadsQ__mh_w_793_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_793_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_793_equation_0, values = (var_4662_cast_fp16, var_4598_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_793_cast_fp16")]; tensor _SplitHeadsQ__mh_w_795_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_795_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_795_equation_0, values = (var_4662_cast_fp16, var_4599_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_795_cast_fp16")]; tensor _SplitHeadsQ__mh_w_797_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_797_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_797_equation_0, values = (var_4662_cast_fp16, var_4600_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_797_cast_fp16")]; tensor _SplitHeadsQ__mh_w_799_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_799_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_799_equation_0, values = (var_4662_cast_fp16, var_4601_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_799_cast_fp16")]; tensor _SplitHeadsQ__mh_w_801_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_801_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_801_equation_0, values = (var_4662_cast_fp16, var_4602_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_801_cast_fp16")]; tensor _SplitHeadsQ__mh_w_803_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_803_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_803_equation_0, values = (var_4662_cast_fp16, var_4603_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_803_cast_fp16")]; tensor _SplitHeadsQ__mh_w_805_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_805_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_805_equation_0, values = (var_4666_cast_fp16, var_4604_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_805_cast_fp16")]; tensor _SplitHeadsQ__mh_w_807_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_807_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_807_equation_0, values = (var_4666_cast_fp16, var_4605_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_807_cast_fp16")]; tensor _SplitHeadsQ__mh_w_809_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_809_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_809_equation_0, values = (var_4666_cast_fp16, var_4606_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_809_cast_fp16")]; tensor _SplitHeadsQ__mh_w_811_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_811_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_811_equation_0, values = (var_4666_cast_fp16, var_4607_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_811_cast_fp16")]; tensor _SplitHeadsQ__mh_w_813_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_813_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_813_equation_0, values = (var_4666_cast_fp16, var_4608_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_813_cast_fp16")]; tensor _SplitHeadsQ__mh_w_815_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_815_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_815_equation_0, values = (var_4666_cast_fp16, var_4609_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_815_cast_fp16")]; tensor _SplitHeadsQ__mh_w_817_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_817_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_817_equation_0, values = (var_4670_cast_fp16, var_4610_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_817_cast_fp16")]; tensor _SplitHeadsQ__mh_w_819_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_819_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_819_equation_0, values = (var_4670_cast_fp16, var_4611_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_819_cast_fp16")]; tensor _SplitHeadsQ__mh_w_821_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_821_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_821_equation_0, values = (var_4670_cast_fp16, var_4612_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_821_cast_fp16")]; tensor _SplitHeadsQ__mh_w_823_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_823_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_823_equation_0, values = (var_4670_cast_fp16, var_4613_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_823_cast_fp16")]; tensor _SplitHeadsQ__mh_w_825_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_825_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_825_equation_0, values = (var_4670_cast_fp16, var_4614_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_825_cast_fp16")]; tensor _SplitHeadsQ__mh_w_827_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_827_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_827_equation_0, values = (var_4670_cast_fp16, var_4615_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_827_cast_fp16")]; tensor _SplitHeadsQ__mh_w_829_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_829_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_829_equation_0, values = (var_4674_cast_fp16, var_4616_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_829_cast_fp16")]; tensor _SplitHeadsQ__mh_w_831_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_831_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_831_equation_0, values = (var_4674_cast_fp16, var_4617_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_831_cast_fp16")]; tensor _SplitHeadsQ__mh_w_833_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_833_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_833_equation_0, values = (var_4674_cast_fp16, var_4618_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_833_cast_fp16")]; tensor _SplitHeadsQ__mh_w_835_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_835_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_835_equation_0, values = (var_4674_cast_fp16, var_4619_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_835_cast_fp16")]; tensor _SplitHeadsQ__mh_w_837_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_837_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_837_equation_0, values = (var_4674_cast_fp16, var_4620_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_837_cast_fp16")]; tensor _SplitHeadsQ__mh_w_839_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_839_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_839_equation_0, values = (var_4674_cast_fp16, var_4621_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_839_cast_fp16")]; tensor _SplitHeadsQ__mh_w_841_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_841_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_841_equation_0, values = (var_4678_cast_fp16, var_4622_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_841_cast_fp16")]; tensor _SplitHeadsQ__mh_w_843_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_843_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_843_equation_0, values = (var_4678_cast_fp16, var_4623_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_843_cast_fp16")]; tensor _SplitHeadsQ__mh_w_845_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_845_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_845_equation_0, values = (var_4678_cast_fp16, var_4624_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_845_cast_fp16")]; tensor _SplitHeadsQ__mh_w_847_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_847_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_847_equation_0, values = (var_4678_cast_fp16, var_4625_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_847_cast_fp16")]; tensor _SplitHeadsQ__mh_w_849_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_849_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_849_equation_0, values = (var_4678_cast_fp16, var_4626_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_849_cast_fp16")]; tensor _SplitHeadsQ__mh_w_851_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_851_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_851_equation_0, values = (var_4678_cast_fp16, var_4627_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_851_cast_fp16")]; tensor _SplitHeadsQ__mh_w_853_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_853_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_853_equation_0, values = (var_4682_cast_fp16, var_4628_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_853_cast_fp16")]; tensor _SplitHeadsQ__mh_w_855_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_855_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_855_equation_0, values = (var_4682_cast_fp16, var_4629_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_855_cast_fp16")]; tensor _SplitHeadsQ__mh_w_857_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_857_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_857_equation_0, values = (var_4682_cast_fp16, var_4630_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_857_cast_fp16")]; tensor _SplitHeadsQ__mh_w_859_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_859_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_859_equation_0, values = (var_4682_cast_fp16, var_4631_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_859_cast_fp16")]; tensor _SplitHeadsQ__mh_w_861_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_861_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_861_equation_0, values = (var_4682_cast_fp16, var_4632_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_861_cast_fp16")]; tensor _SplitHeadsQ__mh_w_863_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_863_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_863_equation_0, values = (var_4682_cast_fp16, var_4633_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_863_cast_fp16")]; tensor var_4875_to_fp16 = const()[name = tensor("op_4875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_721_cast_fp16, y = var_4875_to_fp16)[name = tensor("aw_chunk_721_cast_fp16")]; tensor var_4877_to_fp16 = const()[name = tensor("op_4877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_723_cast_fp16, y = var_4877_to_fp16)[name = tensor("aw_chunk_723_cast_fp16")]; tensor var_4879_to_fp16 = const()[name = tensor("op_4879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_725_cast_fp16, y = var_4879_to_fp16)[name = tensor("aw_chunk_725_cast_fp16")]; tensor var_4881_to_fp16 = const()[name = tensor("op_4881_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_727_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_727_cast_fp16, y = var_4881_to_fp16)[name = tensor("aw_chunk_727_cast_fp16")]; tensor var_4883_to_fp16 = const()[name = tensor("op_4883_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_729_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_729_cast_fp16, y = var_4883_to_fp16)[name = tensor("aw_chunk_729_cast_fp16")]; tensor var_4885_to_fp16 = const()[name = tensor("op_4885_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_731_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_731_cast_fp16, y = var_4885_to_fp16)[name = tensor("aw_chunk_731_cast_fp16")]; tensor var_4887_to_fp16 = const()[name = tensor("op_4887_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_733_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_733_cast_fp16, y = var_4887_to_fp16)[name = tensor("aw_chunk_733_cast_fp16")]; tensor var_4889_to_fp16 = const()[name = tensor("op_4889_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_735_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_735_cast_fp16, y = var_4889_to_fp16)[name = tensor("aw_chunk_735_cast_fp16")]; tensor var_4891_to_fp16 = const()[name = tensor("op_4891_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_737_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_737_cast_fp16, y = var_4891_to_fp16)[name = tensor("aw_chunk_737_cast_fp16")]; tensor var_4893_to_fp16 = const()[name = tensor("op_4893_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_739_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_739_cast_fp16, y = var_4893_to_fp16)[name = tensor("aw_chunk_739_cast_fp16")]; tensor var_4895_to_fp16 = const()[name = tensor("op_4895_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_741_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_741_cast_fp16, y = var_4895_to_fp16)[name = tensor("aw_chunk_741_cast_fp16")]; tensor var_4897_to_fp16 = const()[name = tensor("op_4897_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_743_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_743_cast_fp16, y = var_4897_to_fp16)[name = tensor("aw_chunk_743_cast_fp16")]; tensor var_4899_to_fp16 = const()[name = tensor("op_4899_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_745_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_745_cast_fp16, y = var_4899_to_fp16)[name = tensor("aw_chunk_745_cast_fp16")]; tensor var_4901_to_fp16 = const()[name = tensor("op_4901_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_747_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_747_cast_fp16, y = var_4901_to_fp16)[name = tensor("aw_chunk_747_cast_fp16")]; tensor var_4903_to_fp16 = const()[name = tensor("op_4903_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_749_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_749_cast_fp16, y = var_4903_to_fp16)[name = tensor("aw_chunk_749_cast_fp16")]; tensor var_4905_to_fp16 = const()[name = tensor("op_4905_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_751_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_751_cast_fp16, y = var_4905_to_fp16)[name = tensor("aw_chunk_751_cast_fp16")]; tensor var_4907_to_fp16 = const()[name = tensor("op_4907_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_753_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_753_cast_fp16, y = var_4907_to_fp16)[name = tensor("aw_chunk_753_cast_fp16")]; tensor var_4909_to_fp16 = const()[name = tensor("op_4909_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_755_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_755_cast_fp16, y = var_4909_to_fp16)[name = tensor("aw_chunk_755_cast_fp16")]; tensor var_4911_to_fp16 = const()[name = tensor("op_4911_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_757_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_757_cast_fp16, y = var_4911_to_fp16)[name = tensor("aw_chunk_757_cast_fp16")]; tensor var_4913_to_fp16 = const()[name = tensor("op_4913_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_759_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_759_cast_fp16, y = var_4913_to_fp16)[name = tensor("aw_chunk_759_cast_fp16")]; tensor var_4915_to_fp16 = const()[name = tensor("op_4915_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_761_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_761_cast_fp16, y = var_4915_to_fp16)[name = tensor("aw_chunk_761_cast_fp16")]; tensor var_4917_to_fp16 = const()[name = tensor("op_4917_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_763_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_763_cast_fp16, y = var_4917_to_fp16)[name = tensor("aw_chunk_763_cast_fp16")]; tensor var_4919_to_fp16 = const()[name = tensor("op_4919_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_765_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_765_cast_fp16, y = var_4919_to_fp16)[name = tensor("aw_chunk_765_cast_fp16")]; tensor var_4921_to_fp16 = const()[name = tensor("op_4921_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_767_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_767_cast_fp16, y = var_4921_to_fp16)[name = tensor("aw_chunk_767_cast_fp16")]; tensor var_4923_to_fp16 = const()[name = tensor("op_4923_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_769_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_769_cast_fp16, y = var_4923_to_fp16)[name = tensor("aw_chunk_769_cast_fp16")]; tensor var_4925_to_fp16 = const()[name = tensor("op_4925_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_771_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_771_cast_fp16, y = var_4925_to_fp16)[name = tensor("aw_chunk_771_cast_fp16")]; tensor var_4927_to_fp16 = const()[name = tensor("op_4927_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_773_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_773_cast_fp16, y = var_4927_to_fp16)[name = tensor("aw_chunk_773_cast_fp16")]; tensor var_4929_to_fp16 = const()[name = tensor("op_4929_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_775_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_775_cast_fp16, y = var_4929_to_fp16)[name = tensor("aw_chunk_775_cast_fp16")]; tensor var_4931_to_fp16 = const()[name = tensor("op_4931_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_777_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_777_cast_fp16, y = var_4931_to_fp16)[name = tensor("aw_chunk_777_cast_fp16")]; tensor var_4933_to_fp16 = const()[name = tensor("op_4933_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_779_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_779_cast_fp16, y = var_4933_to_fp16)[name = tensor("aw_chunk_779_cast_fp16")]; tensor var_4935_to_fp16 = const()[name = tensor("op_4935_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_781_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_781_cast_fp16, y = var_4935_to_fp16)[name = tensor("aw_chunk_781_cast_fp16")]; tensor var_4937_to_fp16 = const()[name = tensor("op_4937_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_783_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_783_cast_fp16, y = var_4937_to_fp16)[name = tensor("aw_chunk_783_cast_fp16")]; tensor var_4939_to_fp16 = const()[name = tensor("op_4939_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_785_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_785_cast_fp16, y = var_4939_to_fp16)[name = tensor("aw_chunk_785_cast_fp16")]; tensor var_4941_to_fp16 = const()[name = tensor("op_4941_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_787_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_787_cast_fp16, y = var_4941_to_fp16)[name = tensor("aw_chunk_787_cast_fp16")]; tensor var_4943_to_fp16 = const()[name = tensor("op_4943_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_789_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_789_cast_fp16, y = var_4943_to_fp16)[name = tensor("aw_chunk_789_cast_fp16")]; tensor var_4945_to_fp16 = const()[name = tensor("op_4945_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_791_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_791_cast_fp16, y = var_4945_to_fp16)[name = tensor("aw_chunk_791_cast_fp16")]; tensor var_4947_to_fp16 = const()[name = tensor("op_4947_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_793_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_793_cast_fp16, y = var_4947_to_fp16)[name = tensor("aw_chunk_793_cast_fp16")]; tensor var_4949_to_fp16 = const()[name = tensor("op_4949_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_795_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_795_cast_fp16, y = var_4949_to_fp16)[name = tensor("aw_chunk_795_cast_fp16")]; tensor var_4951_to_fp16 = const()[name = tensor("op_4951_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_797_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_797_cast_fp16, y = var_4951_to_fp16)[name = tensor("aw_chunk_797_cast_fp16")]; tensor var_4953_to_fp16 = const()[name = tensor("op_4953_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_799_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_799_cast_fp16, y = var_4953_to_fp16)[name = tensor("aw_chunk_799_cast_fp16")]; tensor var_4955_to_fp16 = const()[name = tensor("op_4955_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_801_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_801_cast_fp16, y = var_4955_to_fp16)[name = tensor("aw_chunk_801_cast_fp16")]; tensor var_4957_to_fp16 = const()[name = tensor("op_4957_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_803_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_803_cast_fp16, y = var_4957_to_fp16)[name = tensor("aw_chunk_803_cast_fp16")]; tensor var_4959_to_fp16 = const()[name = tensor("op_4959_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_805_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_805_cast_fp16, y = var_4959_to_fp16)[name = tensor("aw_chunk_805_cast_fp16")]; tensor var_4961_to_fp16 = const()[name = tensor("op_4961_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_807_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_807_cast_fp16, y = var_4961_to_fp16)[name = tensor("aw_chunk_807_cast_fp16")]; tensor var_4963_to_fp16 = const()[name = tensor("op_4963_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_809_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_809_cast_fp16, y = var_4963_to_fp16)[name = tensor("aw_chunk_809_cast_fp16")]; tensor var_4965_to_fp16 = const()[name = tensor("op_4965_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_811_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_811_cast_fp16, y = var_4965_to_fp16)[name = tensor("aw_chunk_811_cast_fp16")]; tensor var_4967_to_fp16 = const()[name = tensor("op_4967_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_813_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_813_cast_fp16, y = var_4967_to_fp16)[name = tensor("aw_chunk_813_cast_fp16")]; tensor var_4969_to_fp16 = const()[name = tensor("op_4969_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_815_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_815_cast_fp16, y = var_4969_to_fp16)[name = tensor("aw_chunk_815_cast_fp16")]; tensor var_4971_to_fp16 = const()[name = tensor("op_4971_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_817_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_817_cast_fp16, y = var_4971_to_fp16)[name = tensor("aw_chunk_817_cast_fp16")]; tensor var_4973_to_fp16 = const()[name = tensor("op_4973_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_819_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_819_cast_fp16, y = var_4973_to_fp16)[name = tensor("aw_chunk_819_cast_fp16")]; tensor var_4975_to_fp16 = const()[name = tensor("op_4975_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_821_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_821_cast_fp16, y = var_4975_to_fp16)[name = tensor("aw_chunk_821_cast_fp16")]; tensor var_4977_to_fp16 = const()[name = tensor("op_4977_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_823_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_823_cast_fp16, y = var_4977_to_fp16)[name = tensor("aw_chunk_823_cast_fp16")]; tensor var_4979_to_fp16 = const()[name = tensor("op_4979_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_825_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_825_cast_fp16, y = var_4979_to_fp16)[name = tensor("aw_chunk_825_cast_fp16")]; tensor var_4981_to_fp16 = const()[name = tensor("op_4981_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_827_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_827_cast_fp16, y = var_4981_to_fp16)[name = tensor("aw_chunk_827_cast_fp16")]; tensor var_4983_to_fp16 = const()[name = tensor("op_4983_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_829_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_829_cast_fp16, y = var_4983_to_fp16)[name = tensor("aw_chunk_829_cast_fp16")]; tensor var_4985_to_fp16 = const()[name = tensor("op_4985_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_831_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_831_cast_fp16, y = var_4985_to_fp16)[name = tensor("aw_chunk_831_cast_fp16")]; tensor var_4987_to_fp16 = const()[name = tensor("op_4987_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_833_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_833_cast_fp16, y = var_4987_to_fp16)[name = tensor("aw_chunk_833_cast_fp16")]; tensor var_4989_to_fp16 = const()[name = tensor("op_4989_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_835_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_835_cast_fp16, y = var_4989_to_fp16)[name = tensor("aw_chunk_835_cast_fp16")]; tensor var_4991_to_fp16 = const()[name = tensor("op_4991_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_837_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_837_cast_fp16, y = var_4991_to_fp16)[name = tensor("aw_chunk_837_cast_fp16")]; tensor var_4993_to_fp16 = const()[name = tensor("op_4993_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_839_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_839_cast_fp16, y = var_4993_to_fp16)[name = tensor("aw_chunk_839_cast_fp16")]; tensor var_4995_to_fp16 = const()[name = tensor("op_4995_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_841_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_841_cast_fp16, y = var_4995_to_fp16)[name = tensor("aw_chunk_841_cast_fp16")]; tensor var_4997_to_fp16 = const()[name = tensor("op_4997_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_843_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_843_cast_fp16, y = var_4997_to_fp16)[name = tensor("aw_chunk_843_cast_fp16")]; tensor var_4999_to_fp16 = const()[name = tensor("op_4999_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_845_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_845_cast_fp16, y = var_4999_to_fp16)[name = tensor("aw_chunk_845_cast_fp16")]; tensor var_5001_to_fp16 = const()[name = tensor("op_5001_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_847_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_847_cast_fp16, y = var_5001_to_fp16)[name = tensor("aw_chunk_847_cast_fp16")]; tensor var_5003_to_fp16 = const()[name = tensor("op_5003_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_849_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_849_cast_fp16, y = var_5003_to_fp16)[name = tensor("aw_chunk_849_cast_fp16")]; tensor var_5005_to_fp16 = const()[name = tensor("op_5005_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_851_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_851_cast_fp16, y = var_5005_to_fp16)[name = tensor("aw_chunk_851_cast_fp16")]; tensor var_5007_to_fp16 = const()[name = tensor("op_5007_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_853_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_853_cast_fp16, y = var_5007_to_fp16)[name = tensor("aw_chunk_853_cast_fp16")]; tensor var_5009_to_fp16 = const()[name = tensor("op_5009_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_855_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_855_cast_fp16, y = var_5009_to_fp16)[name = tensor("aw_chunk_855_cast_fp16")]; tensor var_5011_to_fp16 = const()[name = tensor("op_5011_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_857_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_857_cast_fp16, y = var_5011_to_fp16)[name = tensor("aw_chunk_857_cast_fp16")]; tensor var_5013_to_fp16 = const()[name = tensor("op_5013_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_859_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_859_cast_fp16, y = var_5013_to_fp16)[name = tensor("aw_chunk_859_cast_fp16")]; tensor var_5015_to_fp16 = const()[name = tensor("op_5015_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_861_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_861_cast_fp16, y = var_5015_to_fp16)[name = tensor("aw_chunk_861_cast_fp16")]; tensor var_5017_to_fp16 = const()[name = tensor("op_5017_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_863_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_863_cast_fp16, y = var_5017_to_fp16)[name = tensor("aw_chunk_863_cast_fp16")]; tensor var_5019_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_721_cast_fp16)[name = tensor("op_5019_cast_fp16")]; tensor var_5020_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_723_cast_fp16)[name = tensor("op_5020_cast_fp16")]; tensor var_5021_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_725_cast_fp16)[name = tensor("op_5021_cast_fp16")]; tensor var_5022_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_727_cast_fp16)[name = tensor("op_5022_cast_fp16")]; tensor var_5023_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_729_cast_fp16)[name = tensor("op_5023_cast_fp16")]; tensor var_5024_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_731_cast_fp16)[name = tensor("op_5024_cast_fp16")]; tensor var_5025_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_733_cast_fp16)[name = tensor("op_5025_cast_fp16")]; tensor var_5026_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_735_cast_fp16)[name = tensor("op_5026_cast_fp16")]; tensor var_5027_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_737_cast_fp16)[name = tensor("op_5027_cast_fp16")]; tensor var_5028_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_739_cast_fp16)[name = tensor("op_5028_cast_fp16")]; tensor var_5029_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_741_cast_fp16)[name = tensor("op_5029_cast_fp16")]; tensor var_5030_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_743_cast_fp16)[name = tensor("op_5030_cast_fp16")]; tensor var_5031_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_745_cast_fp16)[name = tensor("op_5031_cast_fp16")]; tensor var_5032_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_747_cast_fp16)[name = tensor("op_5032_cast_fp16")]; tensor var_5033_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_749_cast_fp16)[name = tensor("op_5033_cast_fp16")]; tensor var_5034_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_751_cast_fp16)[name = tensor("op_5034_cast_fp16")]; tensor var_5035_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_753_cast_fp16)[name = tensor("op_5035_cast_fp16")]; tensor var_5036_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_755_cast_fp16)[name = tensor("op_5036_cast_fp16")]; tensor var_5037_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_757_cast_fp16)[name = tensor("op_5037_cast_fp16")]; tensor var_5038_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_759_cast_fp16)[name = tensor("op_5038_cast_fp16")]; tensor var_5039_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_761_cast_fp16)[name = tensor("op_5039_cast_fp16")]; tensor var_5040_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_763_cast_fp16)[name = tensor("op_5040_cast_fp16")]; tensor var_5041_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_765_cast_fp16)[name = tensor("op_5041_cast_fp16")]; tensor var_5042_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_767_cast_fp16)[name = tensor("op_5042_cast_fp16")]; tensor var_5043_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_769_cast_fp16)[name = tensor("op_5043_cast_fp16")]; tensor var_5044_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_771_cast_fp16)[name = tensor("op_5044_cast_fp16")]; tensor var_5045_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_773_cast_fp16)[name = tensor("op_5045_cast_fp16")]; tensor var_5046_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_775_cast_fp16)[name = tensor("op_5046_cast_fp16")]; tensor var_5047_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_777_cast_fp16)[name = tensor("op_5047_cast_fp16")]; tensor var_5048_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_779_cast_fp16)[name = tensor("op_5048_cast_fp16")]; tensor var_5049_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_781_cast_fp16)[name = tensor("op_5049_cast_fp16")]; tensor var_5050_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_783_cast_fp16)[name = tensor("op_5050_cast_fp16")]; tensor var_5051_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_785_cast_fp16)[name = tensor("op_5051_cast_fp16")]; tensor var_5052_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_787_cast_fp16)[name = tensor("op_5052_cast_fp16")]; tensor var_5053_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_789_cast_fp16)[name = tensor("op_5053_cast_fp16")]; tensor var_5054_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_791_cast_fp16)[name = tensor("op_5054_cast_fp16")]; tensor var_5055_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_793_cast_fp16)[name = tensor("op_5055_cast_fp16")]; tensor var_5056_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_795_cast_fp16)[name = tensor("op_5056_cast_fp16")]; tensor var_5057_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_797_cast_fp16)[name = tensor("op_5057_cast_fp16")]; tensor var_5058_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_799_cast_fp16)[name = tensor("op_5058_cast_fp16")]; tensor var_5059_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_801_cast_fp16)[name = tensor("op_5059_cast_fp16")]; tensor var_5060_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_803_cast_fp16)[name = tensor("op_5060_cast_fp16")]; tensor var_5061_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_805_cast_fp16)[name = tensor("op_5061_cast_fp16")]; tensor var_5062_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_807_cast_fp16)[name = tensor("op_5062_cast_fp16")]; tensor var_5063_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_809_cast_fp16)[name = tensor("op_5063_cast_fp16")]; tensor var_5064_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_811_cast_fp16)[name = tensor("op_5064_cast_fp16")]; tensor var_5065_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_813_cast_fp16)[name = tensor("op_5065_cast_fp16")]; tensor var_5066_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_815_cast_fp16)[name = tensor("op_5066_cast_fp16")]; tensor var_5067_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_817_cast_fp16)[name = tensor("op_5067_cast_fp16")]; tensor var_5068_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_819_cast_fp16)[name = tensor("op_5068_cast_fp16")]; tensor var_5069_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_821_cast_fp16)[name = tensor("op_5069_cast_fp16")]; tensor var_5070_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_823_cast_fp16)[name = tensor("op_5070_cast_fp16")]; tensor var_5071_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_825_cast_fp16)[name = tensor("op_5071_cast_fp16")]; tensor var_5072_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_827_cast_fp16)[name = tensor("op_5072_cast_fp16")]; tensor var_5073_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_829_cast_fp16)[name = tensor("op_5073_cast_fp16")]; tensor var_5074_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_831_cast_fp16)[name = tensor("op_5074_cast_fp16")]; tensor var_5075_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_833_cast_fp16)[name = tensor("op_5075_cast_fp16")]; tensor var_5076_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_835_cast_fp16)[name = tensor("op_5076_cast_fp16")]; tensor var_5077_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_837_cast_fp16)[name = tensor("op_5077_cast_fp16")]; tensor var_5078_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_839_cast_fp16)[name = tensor("op_5078_cast_fp16")]; tensor var_5079_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_841_cast_fp16)[name = tensor("op_5079_cast_fp16")]; tensor var_5080_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_843_cast_fp16)[name = tensor("op_5080_cast_fp16")]; tensor var_5081_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_845_cast_fp16)[name = tensor("op_5081_cast_fp16")]; tensor var_5082_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_847_cast_fp16)[name = tensor("op_5082_cast_fp16")]; tensor var_5083_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_849_cast_fp16)[name = tensor("op_5083_cast_fp16")]; tensor var_5084_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_851_cast_fp16)[name = tensor("op_5084_cast_fp16")]; tensor var_5085_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_853_cast_fp16)[name = tensor("op_5085_cast_fp16")]; tensor var_5086_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_855_cast_fp16)[name = tensor("op_5086_cast_fp16")]; tensor var_5087_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_857_cast_fp16)[name = tensor("op_5087_cast_fp16")]; tensor var_5088_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_859_cast_fp16)[name = tensor("op_5088_cast_fp16")]; tensor var_5089_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_861_cast_fp16)[name = tensor("op_5089_cast_fp16")]; tensor var_5090_cast_fp16 = softmax(axis = var_4463, x = aw_chunk_863_cast_fp16)[name = tensor("op_5090_cast_fp16")]; tensor var_5092_equation_0 = const()[name = tensor("op_5092_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5092_cast_fp16 = einsum(equation = var_5092_equation_0, values = (var_4684_cast_fp16, var_5019_cast_fp16))[name = tensor("op_5092_cast_fp16")]; tensor var_5094_equation_0 = const()[name = tensor("op_5094_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5094_cast_fp16 = einsum(equation = var_5094_equation_0, values = (var_4684_cast_fp16, var_5020_cast_fp16))[name = tensor("op_5094_cast_fp16")]; tensor var_5096_equation_0 = const()[name = tensor("op_5096_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5096_cast_fp16 = einsum(equation = var_5096_equation_0, values = (var_4684_cast_fp16, var_5021_cast_fp16))[name = tensor("op_5096_cast_fp16")]; tensor var_5098_equation_0 = const()[name = tensor("op_5098_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5098_cast_fp16 = einsum(equation = var_5098_equation_0, values = (var_4684_cast_fp16, var_5022_cast_fp16))[name = tensor("op_5098_cast_fp16")]; tensor var_5100_equation_0 = const()[name = tensor("op_5100_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5100_cast_fp16 = einsum(equation = var_5100_equation_0, values = (var_4684_cast_fp16, var_5023_cast_fp16))[name = tensor("op_5100_cast_fp16")]; tensor var_5102_equation_0 = const()[name = tensor("op_5102_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5102_cast_fp16 = einsum(equation = var_5102_equation_0, values = (var_4684_cast_fp16, var_5024_cast_fp16))[name = tensor("op_5102_cast_fp16")]; tensor var_5104_equation_0 = const()[name = tensor("op_5104_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5104_cast_fp16 = einsum(equation = var_5104_equation_0, values = (var_4688_cast_fp16, var_5025_cast_fp16))[name = tensor("op_5104_cast_fp16")]; tensor var_5106_equation_0 = const()[name = tensor("op_5106_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5106_cast_fp16 = einsum(equation = var_5106_equation_0, values = (var_4688_cast_fp16, var_5026_cast_fp16))[name = tensor("op_5106_cast_fp16")]; tensor var_5108_equation_0 = const()[name = tensor("op_5108_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5108_cast_fp16 = einsum(equation = var_5108_equation_0, values = (var_4688_cast_fp16, var_5027_cast_fp16))[name = tensor("op_5108_cast_fp16")]; tensor var_5110_equation_0 = const()[name = tensor("op_5110_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5110_cast_fp16 = einsum(equation = var_5110_equation_0, values = (var_4688_cast_fp16, var_5028_cast_fp16))[name = tensor("op_5110_cast_fp16")]; tensor var_5112_equation_0 = const()[name = tensor("op_5112_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5112_cast_fp16 = einsum(equation = var_5112_equation_0, values = (var_4688_cast_fp16, var_5029_cast_fp16))[name = tensor("op_5112_cast_fp16")]; tensor var_5114_equation_0 = const()[name = tensor("op_5114_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5114_cast_fp16 = einsum(equation = var_5114_equation_0, values = (var_4688_cast_fp16, var_5030_cast_fp16))[name = tensor("op_5114_cast_fp16")]; tensor var_5116_equation_0 = const()[name = tensor("op_5116_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5116_cast_fp16 = einsum(equation = var_5116_equation_0, values = (var_4692_cast_fp16, var_5031_cast_fp16))[name = tensor("op_5116_cast_fp16")]; tensor var_5118_equation_0 = const()[name = tensor("op_5118_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5118_cast_fp16 = einsum(equation = var_5118_equation_0, values = (var_4692_cast_fp16, var_5032_cast_fp16))[name = tensor("op_5118_cast_fp16")]; tensor var_5120_equation_0 = const()[name = tensor("op_5120_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5120_cast_fp16 = einsum(equation = var_5120_equation_0, values = (var_4692_cast_fp16, var_5033_cast_fp16))[name = tensor("op_5120_cast_fp16")]; tensor var_5122_equation_0 = const()[name = tensor("op_5122_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5122_cast_fp16 = einsum(equation = var_5122_equation_0, values = (var_4692_cast_fp16, var_5034_cast_fp16))[name = tensor("op_5122_cast_fp16")]; tensor var_5124_equation_0 = const()[name = tensor("op_5124_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5124_cast_fp16 = einsum(equation = var_5124_equation_0, values = (var_4692_cast_fp16, var_5035_cast_fp16))[name = tensor("op_5124_cast_fp16")]; tensor var_5126_equation_0 = const()[name = tensor("op_5126_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5126_cast_fp16 = einsum(equation = var_5126_equation_0, values = (var_4692_cast_fp16, var_5036_cast_fp16))[name = tensor("op_5126_cast_fp16")]; tensor var_5128_equation_0 = const()[name = tensor("op_5128_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5128_cast_fp16 = einsum(equation = var_5128_equation_0, values = (var_4696_cast_fp16, var_5037_cast_fp16))[name = tensor("op_5128_cast_fp16")]; tensor var_5130_equation_0 = const()[name = tensor("op_5130_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5130_cast_fp16 = einsum(equation = var_5130_equation_0, values = (var_4696_cast_fp16, var_5038_cast_fp16))[name = tensor("op_5130_cast_fp16")]; tensor var_5132_equation_0 = const()[name = tensor("op_5132_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5132_cast_fp16 = einsum(equation = var_5132_equation_0, values = (var_4696_cast_fp16, var_5039_cast_fp16))[name = tensor("op_5132_cast_fp16")]; tensor var_5134_equation_0 = const()[name = tensor("op_5134_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5134_cast_fp16 = einsum(equation = var_5134_equation_0, values = (var_4696_cast_fp16, var_5040_cast_fp16))[name = tensor("op_5134_cast_fp16")]; tensor var_5136_equation_0 = const()[name = tensor("op_5136_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5136_cast_fp16 = einsum(equation = var_5136_equation_0, values = (var_4696_cast_fp16, var_5041_cast_fp16))[name = tensor("op_5136_cast_fp16")]; tensor var_5138_equation_0 = const()[name = tensor("op_5138_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5138_cast_fp16 = einsum(equation = var_5138_equation_0, values = (var_4696_cast_fp16, var_5042_cast_fp16))[name = tensor("op_5138_cast_fp16")]; tensor var_5140_equation_0 = const()[name = tensor("op_5140_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5140_cast_fp16 = einsum(equation = var_5140_equation_0, values = (var_4700_cast_fp16, var_5043_cast_fp16))[name = tensor("op_5140_cast_fp16")]; tensor var_5142_equation_0 = const()[name = tensor("op_5142_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5142_cast_fp16 = einsum(equation = var_5142_equation_0, values = (var_4700_cast_fp16, var_5044_cast_fp16))[name = tensor("op_5142_cast_fp16")]; tensor var_5144_equation_0 = const()[name = tensor("op_5144_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5144_cast_fp16 = einsum(equation = var_5144_equation_0, values = (var_4700_cast_fp16, var_5045_cast_fp16))[name = tensor("op_5144_cast_fp16")]; tensor var_5146_equation_0 = const()[name = tensor("op_5146_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5146_cast_fp16 = einsum(equation = var_5146_equation_0, values = (var_4700_cast_fp16, var_5046_cast_fp16))[name = tensor("op_5146_cast_fp16")]; tensor var_5148_equation_0 = const()[name = tensor("op_5148_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5148_cast_fp16 = einsum(equation = var_5148_equation_0, values = (var_4700_cast_fp16, var_5047_cast_fp16))[name = tensor("op_5148_cast_fp16")]; tensor var_5150_equation_0 = const()[name = tensor("op_5150_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5150_cast_fp16 = einsum(equation = var_5150_equation_0, values = (var_4700_cast_fp16, var_5048_cast_fp16))[name = tensor("op_5150_cast_fp16")]; tensor var_5152_equation_0 = const()[name = tensor("op_5152_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5152_cast_fp16 = einsum(equation = var_5152_equation_0, values = (var_4704_cast_fp16, var_5049_cast_fp16))[name = tensor("op_5152_cast_fp16")]; tensor var_5154_equation_0 = const()[name = tensor("op_5154_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5154_cast_fp16 = einsum(equation = var_5154_equation_0, values = (var_4704_cast_fp16, var_5050_cast_fp16))[name = tensor("op_5154_cast_fp16")]; tensor var_5156_equation_0 = const()[name = tensor("op_5156_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5156_cast_fp16 = einsum(equation = var_5156_equation_0, values = (var_4704_cast_fp16, var_5051_cast_fp16))[name = tensor("op_5156_cast_fp16")]; tensor var_5158_equation_0 = const()[name = tensor("op_5158_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5158_cast_fp16 = einsum(equation = var_5158_equation_0, values = (var_4704_cast_fp16, var_5052_cast_fp16))[name = tensor("op_5158_cast_fp16")]; tensor var_5160_equation_0 = const()[name = tensor("op_5160_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5160_cast_fp16 = einsum(equation = var_5160_equation_0, values = (var_4704_cast_fp16, var_5053_cast_fp16))[name = tensor("op_5160_cast_fp16")]; tensor var_5162_equation_0 = const()[name = tensor("op_5162_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5162_cast_fp16 = einsum(equation = var_5162_equation_0, values = (var_4704_cast_fp16, var_5054_cast_fp16))[name = tensor("op_5162_cast_fp16")]; tensor var_5164_equation_0 = const()[name = tensor("op_5164_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5164_cast_fp16 = einsum(equation = var_5164_equation_0, values = (var_4708_cast_fp16, var_5055_cast_fp16))[name = tensor("op_5164_cast_fp16")]; tensor var_5166_equation_0 = const()[name = tensor("op_5166_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5166_cast_fp16 = einsum(equation = var_5166_equation_0, values = (var_4708_cast_fp16, var_5056_cast_fp16))[name = tensor("op_5166_cast_fp16")]; tensor var_5168_equation_0 = const()[name = tensor("op_5168_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5168_cast_fp16 = einsum(equation = var_5168_equation_0, values = (var_4708_cast_fp16, var_5057_cast_fp16))[name = tensor("op_5168_cast_fp16")]; tensor var_5170_equation_0 = const()[name = tensor("op_5170_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5170_cast_fp16 = einsum(equation = var_5170_equation_0, values = (var_4708_cast_fp16, var_5058_cast_fp16))[name = tensor("op_5170_cast_fp16")]; tensor var_5172_equation_0 = const()[name = tensor("op_5172_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5172_cast_fp16 = einsum(equation = var_5172_equation_0, values = (var_4708_cast_fp16, var_5059_cast_fp16))[name = tensor("op_5172_cast_fp16")]; tensor var_5174_equation_0 = const()[name = tensor("op_5174_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5174_cast_fp16 = einsum(equation = var_5174_equation_0, values = (var_4708_cast_fp16, var_5060_cast_fp16))[name = tensor("op_5174_cast_fp16")]; tensor var_5176_equation_0 = const()[name = tensor("op_5176_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5176_cast_fp16 = einsum(equation = var_5176_equation_0, values = (var_4712_cast_fp16, var_5061_cast_fp16))[name = tensor("op_5176_cast_fp16")]; tensor var_5178_equation_0 = const()[name = tensor("op_5178_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5178_cast_fp16 = einsum(equation = var_5178_equation_0, values = (var_4712_cast_fp16, var_5062_cast_fp16))[name = tensor("op_5178_cast_fp16")]; tensor var_5180_equation_0 = const()[name = tensor("op_5180_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5180_cast_fp16 = einsum(equation = var_5180_equation_0, values = (var_4712_cast_fp16, var_5063_cast_fp16))[name = tensor("op_5180_cast_fp16")]; tensor var_5182_equation_0 = const()[name = tensor("op_5182_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5182_cast_fp16 = einsum(equation = var_5182_equation_0, values = (var_4712_cast_fp16, var_5064_cast_fp16))[name = tensor("op_5182_cast_fp16")]; tensor var_5184_equation_0 = const()[name = tensor("op_5184_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5184_cast_fp16 = einsum(equation = var_5184_equation_0, values = (var_4712_cast_fp16, var_5065_cast_fp16))[name = tensor("op_5184_cast_fp16")]; tensor var_5186_equation_0 = const()[name = tensor("op_5186_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5186_cast_fp16 = einsum(equation = var_5186_equation_0, values = (var_4712_cast_fp16, var_5066_cast_fp16))[name = tensor("op_5186_cast_fp16")]; tensor var_5188_equation_0 = const()[name = tensor("op_5188_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5188_cast_fp16 = einsum(equation = var_5188_equation_0, values = (var_4716_cast_fp16, var_5067_cast_fp16))[name = tensor("op_5188_cast_fp16")]; tensor var_5190_equation_0 = const()[name = tensor("op_5190_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5190_cast_fp16 = einsum(equation = var_5190_equation_0, values = (var_4716_cast_fp16, var_5068_cast_fp16))[name = tensor("op_5190_cast_fp16")]; tensor var_5192_equation_0 = const()[name = tensor("op_5192_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5192_cast_fp16 = einsum(equation = var_5192_equation_0, values = (var_4716_cast_fp16, var_5069_cast_fp16))[name = tensor("op_5192_cast_fp16")]; tensor var_5194_equation_0 = const()[name = tensor("op_5194_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5194_cast_fp16 = einsum(equation = var_5194_equation_0, values = (var_4716_cast_fp16, var_5070_cast_fp16))[name = tensor("op_5194_cast_fp16")]; tensor var_5196_equation_0 = const()[name = tensor("op_5196_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5196_cast_fp16 = einsum(equation = var_5196_equation_0, values = (var_4716_cast_fp16, var_5071_cast_fp16))[name = tensor("op_5196_cast_fp16")]; tensor var_5198_equation_0 = const()[name = tensor("op_5198_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5198_cast_fp16 = einsum(equation = var_5198_equation_0, values = (var_4716_cast_fp16, var_5072_cast_fp16))[name = tensor("op_5198_cast_fp16")]; tensor var_5200_equation_0 = const()[name = tensor("op_5200_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5200_cast_fp16 = einsum(equation = var_5200_equation_0, values = (var_4720_cast_fp16, var_5073_cast_fp16))[name = tensor("op_5200_cast_fp16")]; tensor var_5202_equation_0 = const()[name = tensor("op_5202_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5202_cast_fp16 = einsum(equation = var_5202_equation_0, values = (var_4720_cast_fp16, var_5074_cast_fp16))[name = tensor("op_5202_cast_fp16")]; tensor var_5204_equation_0 = const()[name = tensor("op_5204_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5204_cast_fp16 = einsum(equation = var_5204_equation_0, values = (var_4720_cast_fp16, var_5075_cast_fp16))[name = tensor("op_5204_cast_fp16")]; tensor var_5206_equation_0 = const()[name = tensor("op_5206_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5206_cast_fp16 = einsum(equation = var_5206_equation_0, values = (var_4720_cast_fp16, var_5076_cast_fp16))[name = tensor("op_5206_cast_fp16")]; tensor var_5208_equation_0 = const()[name = tensor("op_5208_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5208_cast_fp16 = einsum(equation = var_5208_equation_0, values = (var_4720_cast_fp16, var_5077_cast_fp16))[name = tensor("op_5208_cast_fp16")]; tensor var_5210_equation_0 = const()[name = tensor("op_5210_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5210_cast_fp16 = einsum(equation = var_5210_equation_0, values = (var_4720_cast_fp16, var_5078_cast_fp16))[name = tensor("op_5210_cast_fp16")]; tensor var_5212_equation_0 = const()[name = tensor("op_5212_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5212_cast_fp16 = einsum(equation = var_5212_equation_0, values = (var_4724_cast_fp16, var_5079_cast_fp16))[name = tensor("op_5212_cast_fp16")]; tensor var_5214_equation_0 = const()[name = tensor("op_5214_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5214_cast_fp16 = einsum(equation = var_5214_equation_0, values = (var_4724_cast_fp16, var_5080_cast_fp16))[name = tensor("op_5214_cast_fp16")]; tensor var_5216_equation_0 = const()[name = tensor("op_5216_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5216_cast_fp16 = einsum(equation = var_5216_equation_0, values = (var_4724_cast_fp16, var_5081_cast_fp16))[name = tensor("op_5216_cast_fp16")]; tensor var_5218_equation_0 = const()[name = tensor("op_5218_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5218_cast_fp16 = einsum(equation = var_5218_equation_0, values = (var_4724_cast_fp16, var_5082_cast_fp16))[name = tensor("op_5218_cast_fp16")]; tensor var_5220_equation_0 = const()[name = tensor("op_5220_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5220_cast_fp16 = einsum(equation = var_5220_equation_0, values = (var_4724_cast_fp16, var_5083_cast_fp16))[name = tensor("op_5220_cast_fp16")]; tensor var_5222_equation_0 = const()[name = tensor("op_5222_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5222_cast_fp16 = einsum(equation = var_5222_equation_0, values = (var_4724_cast_fp16, var_5084_cast_fp16))[name = tensor("op_5222_cast_fp16")]; tensor var_5224_equation_0 = const()[name = tensor("op_5224_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5224_cast_fp16 = einsum(equation = var_5224_equation_0, values = (var_4728_cast_fp16, var_5085_cast_fp16))[name = tensor("op_5224_cast_fp16")]; tensor var_5226_equation_0 = const()[name = tensor("op_5226_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5226_cast_fp16 = einsum(equation = var_5226_equation_0, values = (var_4728_cast_fp16, var_5086_cast_fp16))[name = tensor("op_5226_cast_fp16")]; tensor var_5228_equation_0 = const()[name = tensor("op_5228_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5228_cast_fp16 = einsum(equation = var_5228_equation_0, values = (var_4728_cast_fp16, var_5087_cast_fp16))[name = tensor("op_5228_cast_fp16")]; tensor var_5230_equation_0 = const()[name = tensor("op_5230_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5230_cast_fp16 = einsum(equation = var_5230_equation_0, values = (var_4728_cast_fp16, var_5088_cast_fp16))[name = tensor("op_5230_cast_fp16")]; tensor var_5232_equation_0 = const()[name = tensor("op_5232_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5232_cast_fp16 = einsum(equation = var_5232_equation_0, values = (var_4728_cast_fp16, var_5089_cast_fp16))[name = tensor("op_5232_cast_fp16")]; tensor var_5234_equation_0 = const()[name = tensor("op_5234_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5234_cast_fp16 = einsum(equation = var_5234_equation_0, values = (var_4728_cast_fp16, var_5090_cast_fp16))[name = tensor("op_5234_cast_fp16")]; tensor var_5236_interleave_0 = const()[name = tensor("op_5236_interleave_0"), val = tensor(false)]; tensor var_5236_cast_fp16 = concat(axis = var_4447, interleave = var_5236_interleave_0, values = (var_5092_cast_fp16, var_5094_cast_fp16, var_5096_cast_fp16, var_5098_cast_fp16, var_5100_cast_fp16, var_5102_cast_fp16))[name = tensor("op_5236_cast_fp16")]; tensor var_5238_interleave_0 = const()[name = tensor("op_5238_interleave_0"), val = tensor(false)]; tensor var_5238_cast_fp16 = concat(axis = var_4447, interleave = var_5238_interleave_0, values = (var_5104_cast_fp16, var_5106_cast_fp16, var_5108_cast_fp16, var_5110_cast_fp16, var_5112_cast_fp16, var_5114_cast_fp16))[name = tensor("op_5238_cast_fp16")]; tensor var_5240_interleave_0 = const()[name = tensor("op_5240_interleave_0"), val = tensor(false)]; tensor var_5240_cast_fp16 = concat(axis = var_4447, interleave = var_5240_interleave_0, values = (var_5116_cast_fp16, var_5118_cast_fp16, var_5120_cast_fp16, var_5122_cast_fp16, var_5124_cast_fp16, var_5126_cast_fp16))[name = tensor("op_5240_cast_fp16")]; tensor var_5242_interleave_0 = const()[name = tensor("op_5242_interleave_0"), val = tensor(false)]; tensor var_5242_cast_fp16 = concat(axis = var_4447, interleave = var_5242_interleave_0, values = (var_5128_cast_fp16, var_5130_cast_fp16, var_5132_cast_fp16, var_5134_cast_fp16, var_5136_cast_fp16, var_5138_cast_fp16))[name = tensor("op_5242_cast_fp16")]; tensor var_5244_interleave_0 = const()[name = tensor("op_5244_interleave_0"), val = tensor(false)]; tensor var_5244_cast_fp16 = concat(axis = var_4447, interleave = var_5244_interleave_0, values = (var_5140_cast_fp16, var_5142_cast_fp16, var_5144_cast_fp16, var_5146_cast_fp16, var_5148_cast_fp16, var_5150_cast_fp16))[name = tensor("op_5244_cast_fp16")]; tensor var_5246_interleave_0 = const()[name = tensor("op_5246_interleave_0"), val = tensor(false)]; tensor var_5246_cast_fp16 = concat(axis = var_4447, interleave = var_5246_interleave_0, values = (var_5152_cast_fp16, var_5154_cast_fp16, var_5156_cast_fp16, var_5158_cast_fp16, var_5160_cast_fp16, var_5162_cast_fp16))[name = tensor("op_5246_cast_fp16")]; tensor var_5248_interleave_0 = const()[name = tensor("op_5248_interleave_0"), val = tensor(false)]; tensor var_5248_cast_fp16 = concat(axis = var_4447, interleave = var_5248_interleave_0, values = (var_5164_cast_fp16, var_5166_cast_fp16, var_5168_cast_fp16, var_5170_cast_fp16, var_5172_cast_fp16, var_5174_cast_fp16))[name = tensor("op_5248_cast_fp16")]; tensor var_5250_interleave_0 = const()[name = tensor("op_5250_interleave_0"), val = tensor(false)]; tensor var_5250_cast_fp16 = concat(axis = var_4447, interleave = var_5250_interleave_0, values = (var_5176_cast_fp16, var_5178_cast_fp16, var_5180_cast_fp16, var_5182_cast_fp16, var_5184_cast_fp16, var_5186_cast_fp16))[name = tensor("op_5250_cast_fp16")]; tensor var_5252_interleave_0 = const()[name = tensor("op_5252_interleave_0"), val = tensor(false)]; tensor var_5252_cast_fp16 = concat(axis = var_4447, interleave = var_5252_interleave_0, values = (var_5188_cast_fp16, var_5190_cast_fp16, var_5192_cast_fp16, var_5194_cast_fp16, var_5196_cast_fp16, var_5198_cast_fp16))[name = tensor("op_5252_cast_fp16")]; tensor var_5254_interleave_0 = const()[name = tensor("op_5254_interleave_0"), val = tensor(false)]; tensor var_5254_cast_fp16 = concat(axis = var_4447, interleave = var_5254_interleave_0, values = (var_5200_cast_fp16, var_5202_cast_fp16, var_5204_cast_fp16, var_5206_cast_fp16, var_5208_cast_fp16, var_5210_cast_fp16))[name = tensor("op_5254_cast_fp16")]; tensor var_5256_interleave_0 = const()[name = tensor("op_5256_interleave_0"), val = tensor(false)]; tensor var_5256_cast_fp16 = concat(axis = var_4447, interleave = var_5256_interleave_0, values = (var_5212_cast_fp16, var_5214_cast_fp16, var_5216_cast_fp16, var_5218_cast_fp16, var_5220_cast_fp16, var_5222_cast_fp16))[name = tensor("op_5256_cast_fp16")]; tensor var_5258_interleave_0 = const()[name = tensor("op_5258_interleave_0"), val = tensor(false)]; tensor var_5258_cast_fp16 = concat(axis = var_4447, interleave = var_5258_interleave_0, values = (var_5224_cast_fp16, var_5226_cast_fp16, var_5228_cast_fp16, var_5230_cast_fp16, var_5232_cast_fp16, var_5234_cast_fp16))[name = tensor("op_5258_cast_fp16")]; tensor input_41_interleave_0 = const()[name = tensor("input_41_interleave_0"), val = tensor(false)]; tensor input_41_cast_fp16 = concat(axis = var_4463, interleave = input_41_interleave_0, values = (var_5236_cast_fp16, var_5238_cast_fp16, var_5240_cast_fp16, var_5242_cast_fp16, var_5244_cast_fp16, var_5246_cast_fp16, var_5248_cast_fp16, var_5250_cast_fp16, var_5252_cast_fp16, var_5254_cast_fp16, var_5256_cast_fp16, var_5258_cast_fp16))[name = tensor("input_41_cast_fp16")]; tensor obj_23_pad_type_0 = const()[name = tensor("obj_23_pad_type_0"), val = tensor("valid")]; tensor obj_23_strides_0 = const()[name = tensor("obj_23_strides_0"), val = tensor([1, 1])]; tensor obj_23_pad_0 = const()[name = tensor("obj_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_23_dilations_0 = const()[name = tensor("obj_23_dilations_0"), val = tensor([1, 1])]; tensor obj_23_groups_0 = const()[name = tensor("obj_23_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80639616)))]; tensor layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81819328)))]; tensor obj_23_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = obj_23_dilations_0, groups = obj_23_groups_0, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = obj_23_strides_0, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("obj_23_cast_fp16")]; tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; tensor var_5277_to_fp16 = const()[name = tensor("op_5277_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_5277_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81820928)))]; tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81822528)))]; tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor input_45_pad_type_0 = const()[name = tensor("input_45_pad_type_0"), val = tensor("valid")]; tensor input_45_strides_0 = const()[name = tensor("input_45_strides_0"), val = tensor([1, 1])]; tensor input_45_pad_0 = const()[name = tensor("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_45_dilations_0 = const()[name = tensor("input_45_dilations_0"), val = tensor([1, 1])]; tensor input_45_groups_0 = const()[name = tensor("input_45_groups_0"), val = tensor(1)]; tensor layers_5_fc1_weight_to_fp16 = const()[name = tensor("layers_5_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81824128)))]; tensor layers_5_fc1_bias_to_fp16 = const()[name = tensor("layers_5_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86542784)))]; tensor input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor hidden_states_15_pad_type_0 = const()[name = tensor("hidden_states_15_pad_type_0"), val = tensor("valid")]; tensor hidden_states_15_strides_0 = const()[name = tensor("hidden_states_15_strides_0"), val = tensor([1, 1])]; tensor hidden_states_15_pad_0 = const()[name = tensor("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_15_dilations_0 = const()[name = tensor("hidden_states_15_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_15_groups_0 = const()[name = tensor("hidden_states_15_groups_0"), val = tensor(1)]; tensor layers_5_fc2_weight_to_fp16 = const()[name = tensor("layers_5_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86548992)))]; tensor layers_5_fc2_bias_to_fp16 = const()[name = tensor("layers_5_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91267648)))]; tensor hidden_states_15_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = layers_5_fc2_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; tensor var_5309 = const()[name = tensor("op_5309"), val = tensor(3)]; tensor var_5325 = const()[name = tensor("op_5325"), val = tensor(1)]; tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; tensor var_5342_to_fp16 = const()[name = tensor("op_5342_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_5342_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; tensor obj_25_gamma_0_to_fp16 = const()[name = tensor("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91269248)))]; tensor obj_25_beta_0_to_fp16 = const()[name = tensor("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91270848)))]; tensor obj_25_epsilon_0_to_fp16 = const()[name = tensor("obj_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_25_cast_fp16")]; tensor query_13_pad_type_0 = const()[name = tensor("query_13_pad_type_0"), val = tensor("valid")]; tensor query_13_strides_0 = const()[name = tensor("query_13_strides_0"), val = tensor([1, 1])]; tensor query_13_pad_0 = const()[name = tensor("query_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_13_dilations_0 = const()[name = tensor("query_13_dilations_0"), val = tensor([1, 1])]; tensor query_13_groups_0 = const()[name = tensor("query_13_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91272448)))]; tensor layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92452160)))]; tensor query_13_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = query_13_dilations_0, groups = query_13_groups_0, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = query_13_strides_0, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor("query_13_cast_fp16")]; tensor key_13_pad_type_0 = const()[name = tensor("key_13_pad_type_0"), val = tensor("valid")]; tensor key_13_strides_0 = const()[name = tensor("key_13_strides_0"), val = tensor([1, 1])]; tensor key_13_pad_0 = const()[name = tensor("key_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_13_dilations_0 = const()[name = tensor("key_13_dilations_0"), val = tensor([1, 1])]; tensor key_13_groups_0 = const()[name = tensor("key_13_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92453760)))]; tensor key_13_cast_fp16 = conv(dilations = key_13_dilations_0, groups = key_13_groups_0, pad = key_13_pad_0, pad_type = key_13_pad_type_0, strides = key_13_strides_0, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor("key_13_cast_fp16")]; tensor value_13_pad_type_0 = const()[name = tensor("value_13_pad_type_0"), val = tensor("valid")]; tensor value_13_strides_0 = const()[name = tensor("value_13_strides_0"), val = tensor([1, 1])]; tensor value_13_pad_0 = const()[name = tensor("value_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_13_dilations_0 = const()[name = tensor("value_13_dilations_0"), val = tensor([1, 1])]; tensor value_13_groups_0 = const()[name = tensor("value_13_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93633472)))]; tensor layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94813184)))]; tensor value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = value_13_dilations_0, groups = value_13_groups_0, pad = value_13_pad_0, pad_type = value_13_pad_type_0, strides = value_13_strides_0, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor("value_13_cast_fp16")]; tensor var_5377_begin_0 = const()[name = tensor("op_5377_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5377_end_0 = const()[name = tensor("op_5377_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5377_end_mask_0 = const()[name = tensor("op_5377_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5377_cast_fp16 = slice_by_index(begin = var_5377_begin_0, end = var_5377_end_0, end_mask = var_5377_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5377_cast_fp16")]; tensor var_5381_begin_0 = const()[name = tensor("op_5381_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_5381_end_0 = const()[name = tensor("op_5381_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_5381_end_mask_0 = const()[name = tensor("op_5381_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5381_cast_fp16 = slice_by_index(begin = var_5381_begin_0, end = var_5381_end_0, end_mask = var_5381_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5381_cast_fp16")]; tensor var_5385_begin_0 = const()[name = tensor("op_5385_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_5385_end_0 = const()[name = tensor("op_5385_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_5385_end_mask_0 = const()[name = tensor("op_5385_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5385_cast_fp16 = slice_by_index(begin = var_5385_begin_0, end = var_5385_end_0, end_mask = var_5385_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5385_cast_fp16")]; tensor var_5389_begin_0 = const()[name = tensor("op_5389_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_5389_end_0 = const()[name = tensor("op_5389_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_5389_end_mask_0 = const()[name = tensor("op_5389_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5389_cast_fp16 = slice_by_index(begin = var_5389_begin_0, end = var_5389_end_0, end_mask = var_5389_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5389_cast_fp16")]; tensor var_5393_begin_0 = const()[name = tensor("op_5393_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_5393_end_0 = const()[name = tensor("op_5393_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_5393_end_mask_0 = const()[name = tensor("op_5393_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5393_cast_fp16 = slice_by_index(begin = var_5393_begin_0, end = var_5393_end_0, end_mask = var_5393_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5393_cast_fp16")]; tensor var_5397_begin_0 = const()[name = tensor("op_5397_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5397_end_0 = const()[name = tensor("op_5397_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_5397_end_mask_0 = const()[name = tensor("op_5397_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5397_cast_fp16 = slice_by_index(begin = var_5397_begin_0, end = var_5397_end_0, end_mask = var_5397_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5397_cast_fp16")]; tensor var_5401_begin_0 = const()[name = tensor("op_5401_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_5401_end_0 = const()[name = tensor("op_5401_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_5401_end_mask_0 = const()[name = tensor("op_5401_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5401_cast_fp16 = slice_by_index(begin = var_5401_begin_0, end = var_5401_end_0, end_mask = var_5401_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5401_cast_fp16")]; tensor var_5405_begin_0 = const()[name = tensor("op_5405_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_5405_end_0 = const()[name = tensor("op_5405_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_5405_end_mask_0 = const()[name = tensor("op_5405_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5405_cast_fp16 = slice_by_index(begin = var_5405_begin_0, end = var_5405_end_0, end_mask = var_5405_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5405_cast_fp16")]; tensor var_5409_begin_0 = const()[name = tensor("op_5409_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_5409_end_0 = const()[name = tensor("op_5409_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_5409_end_mask_0 = const()[name = tensor("op_5409_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5409_cast_fp16 = slice_by_index(begin = var_5409_begin_0, end = var_5409_end_0, end_mask = var_5409_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5409_cast_fp16")]; tensor var_5413_begin_0 = const()[name = tensor("op_5413_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_5413_end_0 = const()[name = tensor("op_5413_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_5413_end_mask_0 = const()[name = tensor("op_5413_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5413_cast_fp16 = slice_by_index(begin = var_5413_begin_0, end = var_5413_end_0, end_mask = var_5413_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5413_cast_fp16")]; tensor var_5417_begin_0 = const()[name = tensor("op_5417_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5417_end_0 = const()[name = tensor("op_5417_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_5417_end_mask_0 = const()[name = tensor("op_5417_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5417_cast_fp16 = slice_by_index(begin = var_5417_begin_0, end = var_5417_end_0, end_mask = var_5417_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5417_cast_fp16")]; tensor var_5421_begin_0 = const()[name = tensor("op_5421_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_5421_end_0 = const()[name = tensor("op_5421_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_5421_end_mask_0 = const()[name = tensor("op_5421_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5421_cast_fp16 = slice_by_index(begin = var_5421_begin_0, end = var_5421_end_0, end_mask = var_5421_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_5421_cast_fp16")]; tensor var_5424_begin_0 = const()[name = tensor("op_5424_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5424_end_0 = const()[name = tensor("op_5424_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5424_end_mask_0 = const()[name = tensor("op_5424_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5424_cast_fp16 = slice_by_index(begin = var_5424_begin_0, end = var_5424_end_0, end_mask = var_5424_end_mask_0, x = var_5377_cast_fp16)[name = tensor("op_5424_cast_fp16")]; tensor var_5425_begin_0 = const()[name = tensor("op_5425_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5425_end_0 = const()[name = tensor("op_5425_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5425_end_mask_0 = const()[name = tensor("op_5425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5425_cast_fp16 = slice_by_index(begin = var_5425_begin_0, end = var_5425_end_0, end_mask = var_5425_end_mask_0, x = var_5377_cast_fp16)[name = tensor("op_5425_cast_fp16")]; tensor var_5426_begin_0 = const()[name = tensor("op_5426_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5426_end_0 = const()[name = tensor("op_5426_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5426_end_mask_0 = const()[name = tensor("op_5426_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5426_cast_fp16 = slice_by_index(begin = var_5426_begin_0, end = var_5426_end_0, end_mask = var_5426_end_mask_0, x = var_5377_cast_fp16)[name = tensor("op_5426_cast_fp16")]; tensor var_5427_begin_0 = const()[name = tensor("op_5427_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5427_end_0 = const()[name = tensor("op_5427_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5427_end_mask_0 = const()[name = tensor("op_5427_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5427_cast_fp16 = slice_by_index(begin = var_5427_begin_0, end = var_5427_end_0, end_mask = var_5427_end_mask_0, x = var_5377_cast_fp16)[name = tensor("op_5427_cast_fp16")]; tensor var_5428_begin_0 = const()[name = tensor("op_5428_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5428_end_0 = const()[name = tensor("op_5428_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5428_end_mask_0 = const()[name = tensor("op_5428_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5428_cast_fp16 = slice_by_index(begin = var_5428_begin_0, end = var_5428_end_0, end_mask = var_5428_end_mask_0, x = var_5377_cast_fp16)[name = tensor("op_5428_cast_fp16")]; tensor var_5429_begin_0 = const()[name = tensor("op_5429_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5429_end_0 = const()[name = tensor("op_5429_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5429_end_mask_0 = const()[name = tensor("op_5429_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5429_cast_fp16 = slice_by_index(begin = var_5429_begin_0, end = var_5429_end_0, end_mask = var_5429_end_mask_0, x = var_5377_cast_fp16)[name = tensor("op_5429_cast_fp16")]; tensor var_5430_begin_0 = const()[name = tensor("op_5430_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5430_end_0 = const()[name = tensor("op_5430_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5430_end_mask_0 = const()[name = tensor("op_5430_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5430_cast_fp16 = slice_by_index(begin = var_5430_begin_0, end = var_5430_end_0, end_mask = var_5430_end_mask_0, x = var_5381_cast_fp16)[name = tensor("op_5430_cast_fp16")]; tensor var_5431_begin_0 = const()[name = tensor("op_5431_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5431_end_0 = const()[name = tensor("op_5431_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5431_end_mask_0 = const()[name = tensor("op_5431_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5431_cast_fp16 = slice_by_index(begin = var_5431_begin_0, end = var_5431_end_0, end_mask = var_5431_end_mask_0, x = var_5381_cast_fp16)[name = tensor("op_5431_cast_fp16")]; tensor var_5432_begin_0 = const()[name = tensor("op_5432_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5432_end_0 = const()[name = tensor("op_5432_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5432_end_mask_0 = const()[name = tensor("op_5432_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5432_cast_fp16 = slice_by_index(begin = var_5432_begin_0, end = var_5432_end_0, end_mask = var_5432_end_mask_0, x = var_5381_cast_fp16)[name = tensor("op_5432_cast_fp16")]; tensor var_5433_begin_0 = const()[name = tensor("op_5433_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5433_end_0 = const()[name = tensor("op_5433_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5433_end_mask_0 = const()[name = tensor("op_5433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5433_cast_fp16 = slice_by_index(begin = var_5433_begin_0, end = var_5433_end_0, end_mask = var_5433_end_mask_0, x = var_5381_cast_fp16)[name = tensor("op_5433_cast_fp16")]; tensor var_5434_begin_0 = const()[name = tensor("op_5434_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5434_end_0 = const()[name = tensor("op_5434_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5434_end_mask_0 = const()[name = tensor("op_5434_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5434_cast_fp16 = slice_by_index(begin = var_5434_begin_0, end = var_5434_end_0, end_mask = var_5434_end_mask_0, x = var_5381_cast_fp16)[name = tensor("op_5434_cast_fp16")]; tensor var_5435_begin_0 = const()[name = tensor("op_5435_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5435_end_0 = const()[name = tensor("op_5435_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5435_end_mask_0 = const()[name = tensor("op_5435_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5435_cast_fp16 = slice_by_index(begin = var_5435_begin_0, end = var_5435_end_0, end_mask = var_5435_end_mask_0, x = var_5381_cast_fp16)[name = tensor("op_5435_cast_fp16")]; tensor var_5436_begin_0 = const()[name = tensor("op_5436_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5436_end_0 = const()[name = tensor("op_5436_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5436_end_mask_0 = const()[name = tensor("op_5436_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5436_cast_fp16 = slice_by_index(begin = var_5436_begin_0, end = var_5436_end_0, end_mask = var_5436_end_mask_0, x = var_5385_cast_fp16)[name = tensor("op_5436_cast_fp16")]; tensor var_5437_begin_0 = const()[name = tensor("op_5437_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5437_end_0 = const()[name = tensor("op_5437_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5437_end_mask_0 = const()[name = tensor("op_5437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5437_cast_fp16 = slice_by_index(begin = var_5437_begin_0, end = var_5437_end_0, end_mask = var_5437_end_mask_0, x = var_5385_cast_fp16)[name = tensor("op_5437_cast_fp16")]; tensor var_5438_begin_0 = const()[name = tensor("op_5438_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5438_end_0 = const()[name = tensor("op_5438_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5438_end_mask_0 = const()[name = tensor("op_5438_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5438_cast_fp16 = slice_by_index(begin = var_5438_begin_0, end = var_5438_end_0, end_mask = var_5438_end_mask_0, x = var_5385_cast_fp16)[name = tensor("op_5438_cast_fp16")]; tensor var_5439_begin_0 = const()[name = tensor("op_5439_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5439_end_0 = const()[name = tensor("op_5439_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5439_end_mask_0 = const()[name = tensor("op_5439_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5439_cast_fp16 = slice_by_index(begin = var_5439_begin_0, end = var_5439_end_0, end_mask = var_5439_end_mask_0, x = var_5385_cast_fp16)[name = tensor("op_5439_cast_fp16")]; tensor var_5440_begin_0 = const()[name = tensor("op_5440_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5440_end_0 = const()[name = tensor("op_5440_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5440_end_mask_0 = const()[name = tensor("op_5440_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5440_cast_fp16 = slice_by_index(begin = var_5440_begin_0, end = var_5440_end_0, end_mask = var_5440_end_mask_0, x = var_5385_cast_fp16)[name = tensor("op_5440_cast_fp16")]; tensor var_5441_begin_0 = const()[name = tensor("op_5441_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5441_end_0 = const()[name = tensor("op_5441_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5441_end_mask_0 = const()[name = tensor("op_5441_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5441_cast_fp16 = slice_by_index(begin = var_5441_begin_0, end = var_5441_end_0, end_mask = var_5441_end_mask_0, x = var_5385_cast_fp16)[name = tensor("op_5441_cast_fp16")]; tensor var_5442_begin_0 = const()[name = tensor("op_5442_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5442_end_0 = const()[name = tensor("op_5442_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5442_end_mask_0 = const()[name = tensor("op_5442_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5442_cast_fp16 = slice_by_index(begin = var_5442_begin_0, end = var_5442_end_0, end_mask = var_5442_end_mask_0, x = var_5389_cast_fp16)[name = tensor("op_5442_cast_fp16")]; tensor var_5443_begin_0 = const()[name = tensor("op_5443_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5443_end_0 = const()[name = tensor("op_5443_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5443_end_mask_0 = const()[name = tensor("op_5443_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5443_cast_fp16 = slice_by_index(begin = var_5443_begin_0, end = var_5443_end_0, end_mask = var_5443_end_mask_0, x = var_5389_cast_fp16)[name = tensor("op_5443_cast_fp16")]; tensor var_5444_begin_0 = const()[name = tensor("op_5444_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5444_end_0 = const()[name = tensor("op_5444_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5444_end_mask_0 = const()[name = tensor("op_5444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5444_cast_fp16 = slice_by_index(begin = var_5444_begin_0, end = var_5444_end_0, end_mask = var_5444_end_mask_0, x = var_5389_cast_fp16)[name = tensor("op_5444_cast_fp16")]; tensor var_5445_begin_0 = const()[name = tensor("op_5445_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5445_end_0 = const()[name = tensor("op_5445_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5445_end_mask_0 = const()[name = tensor("op_5445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5445_cast_fp16 = slice_by_index(begin = var_5445_begin_0, end = var_5445_end_0, end_mask = var_5445_end_mask_0, x = var_5389_cast_fp16)[name = tensor("op_5445_cast_fp16")]; tensor var_5446_begin_0 = const()[name = tensor("op_5446_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5446_end_0 = const()[name = tensor("op_5446_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5446_end_mask_0 = const()[name = tensor("op_5446_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5446_cast_fp16 = slice_by_index(begin = var_5446_begin_0, end = var_5446_end_0, end_mask = var_5446_end_mask_0, x = var_5389_cast_fp16)[name = tensor("op_5446_cast_fp16")]; tensor var_5447_begin_0 = const()[name = tensor("op_5447_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5447_end_0 = const()[name = tensor("op_5447_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5447_end_mask_0 = const()[name = tensor("op_5447_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5447_cast_fp16 = slice_by_index(begin = var_5447_begin_0, end = var_5447_end_0, end_mask = var_5447_end_mask_0, x = var_5389_cast_fp16)[name = tensor("op_5447_cast_fp16")]; tensor var_5448_begin_0 = const()[name = tensor("op_5448_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5448_end_0 = const()[name = tensor("op_5448_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5448_end_mask_0 = const()[name = tensor("op_5448_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5448_cast_fp16 = slice_by_index(begin = var_5448_begin_0, end = var_5448_end_0, end_mask = var_5448_end_mask_0, x = var_5393_cast_fp16)[name = tensor("op_5448_cast_fp16")]; tensor var_5449_begin_0 = const()[name = tensor("op_5449_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5449_end_0 = const()[name = tensor("op_5449_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5449_end_mask_0 = const()[name = tensor("op_5449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5449_cast_fp16 = slice_by_index(begin = var_5449_begin_0, end = var_5449_end_0, end_mask = var_5449_end_mask_0, x = var_5393_cast_fp16)[name = tensor("op_5449_cast_fp16")]; tensor var_5450_begin_0 = const()[name = tensor("op_5450_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5450_end_0 = const()[name = tensor("op_5450_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5450_end_mask_0 = const()[name = tensor("op_5450_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5450_cast_fp16 = slice_by_index(begin = var_5450_begin_0, end = var_5450_end_0, end_mask = var_5450_end_mask_0, x = var_5393_cast_fp16)[name = tensor("op_5450_cast_fp16")]; tensor var_5451_begin_0 = const()[name = tensor("op_5451_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5451_end_0 = const()[name = tensor("op_5451_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5451_end_mask_0 = const()[name = tensor("op_5451_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5451_cast_fp16 = slice_by_index(begin = var_5451_begin_0, end = var_5451_end_0, end_mask = var_5451_end_mask_0, x = var_5393_cast_fp16)[name = tensor("op_5451_cast_fp16")]; tensor var_5452_begin_0 = const()[name = tensor("op_5452_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5452_end_0 = const()[name = tensor("op_5452_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5452_end_mask_0 = const()[name = tensor("op_5452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5452_cast_fp16 = slice_by_index(begin = var_5452_begin_0, end = var_5452_end_0, end_mask = var_5452_end_mask_0, x = var_5393_cast_fp16)[name = tensor("op_5452_cast_fp16")]; tensor var_5453_begin_0 = const()[name = tensor("op_5453_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5453_end_0 = const()[name = tensor("op_5453_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5453_end_mask_0 = const()[name = tensor("op_5453_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5453_cast_fp16 = slice_by_index(begin = var_5453_begin_0, end = var_5453_end_0, end_mask = var_5453_end_mask_0, x = var_5393_cast_fp16)[name = tensor("op_5453_cast_fp16")]; tensor var_5454_begin_0 = const()[name = tensor("op_5454_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5454_end_0 = const()[name = tensor("op_5454_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5454_end_mask_0 = const()[name = tensor("op_5454_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5454_cast_fp16 = slice_by_index(begin = var_5454_begin_0, end = var_5454_end_0, end_mask = var_5454_end_mask_0, x = var_5397_cast_fp16)[name = tensor("op_5454_cast_fp16")]; tensor var_5455_begin_0 = const()[name = tensor("op_5455_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5455_end_0 = const()[name = tensor("op_5455_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5455_end_mask_0 = const()[name = tensor("op_5455_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5455_cast_fp16 = slice_by_index(begin = var_5455_begin_0, end = var_5455_end_0, end_mask = var_5455_end_mask_0, x = var_5397_cast_fp16)[name = tensor("op_5455_cast_fp16")]; tensor var_5456_begin_0 = const()[name = tensor("op_5456_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5456_end_0 = const()[name = tensor("op_5456_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5456_end_mask_0 = const()[name = tensor("op_5456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5456_cast_fp16 = slice_by_index(begin = var_5456_begin_0, end = var_5456_end_0, end_mask = var_5456_end_mask_0, x = var_5397_cast_fp16)[name = tensor("op_5456_cast_fp16")]; tensor var_5457_begin_0 = const()[name = tensor("op_5457_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5457_end_0 = const()[name = tensor("op_5457_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5457_end_mask_0 = const()[name = tensor("op_5457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5457_cast_fp16 = slice_by_index(begin = var_5457_begin_0, end = var_5457_end_0, end_mask = var_5457_end_mask_0, x = var_5397_cast_fp16)[name = tensor("op_5457_cast_fp16")]; tensor var_5458_begin_0 = const()[name = tensor("op_5458_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5458_end_0 = const()[name = tensor("op_5458_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5458_end_mask_0 = const()[name = tensor("op_5458_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5458_cast_fp16 = slice_by_index(begin = var_5458_begin_0, end = var_5458_end_0, end_mask = var_5458_end_mask_0, x = var_5397_cast_fp16)[name = tensor("op_5458_cast_fp16")]; tensor var_5459_begin_0 = const()[name = tensor("op_5459_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5459_end_0 = const()[name = tensor("op_5459_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5459_end_mask_0 = const()[name = tensor("op_5459_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5459_cast_fp16 = slice_by_index(begin = var_5459_begin_0, end = var_5459_end_0, end_mask = var_5459_end_mask_0, x = var_5397_cast_fp16)[name = tensor("op_5459_cast_fp16")]; tensor var_5460_begin_0 = const()[name = tensor("op_5460_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5460_end_0 = const()[name = tensor("op_5460_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5460_end_mask_0 = const()[name = tensor("op_5460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5460_cast_fp16 = slice_by_index(begin = var_5460_begin_0, end = var_5460_end_0, end_mask = var_5460_end_mask_0, x = var_5401_cast_fp16)[name = tensor("op_5460_cast_fp16")]; tensor var_5461_begin_0 = const()[name = tensor("op_5461_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5461_end_0 = const()[name = tensor("op_5461_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5461_end_mask_0 = const()[name = tensor("op_5461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5461_cast_fp16 = slice_by_index(begin = var_5461_begin_0, end = var_5461_end_0, end_mask = var_5461_end_mask_0, x = var_5401_cast_fp16)[name = tensor("op_5461_cast_fp16")]; tensor var_5462_begin_0 = const()[name = tensor("op_5462_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5462_end_0 = const()[name = tensor("op_5462_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5462_end_mask_0 = const()[name = tensor("op_5462_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5462_cast_fp16 = slice_by_index(begin = var_5462_begin_0, end = var_5462_end_0, end_mask = var_5462_end_mask_0, x = var_5401_cast_fp16)[name = tensor("op_5462_cast_fp16")]; tensor var_5463_begin_0 = const()[name = tensor("op_5463_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5463_end_0 = const()[name = tensor("op_5463_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5463_end_mask_0 = const()[name = tensor("op_5463_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5463_cast_fp16 = slice_by_index(begin = var_5463_begin_0, end = var_5463_end_0, end_mask = var_5463_end_mask_0, x = var_5401_cast_fp16)[name = tensor("op_5463_cast_fp16")]; tensor var_5464_begin_0 = const()[name = tensor("op_5464_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5464_end_0 = const()[name = tensor("op_5464_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5464_end_mask_0 = const()[name = tensor("op_5464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5464_cast_fp16 = slice_by_index(begin = var_5464_begin_0, end = var_5464_end_0, end_mask = var_5464_end_mask_0, x = var_5401_cast_fp16)[name = tensor("op_5464_cast_fp16")]; tensor var_5465_begin_0 = const()[name = tensor("op_5465_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5465_end_0 = const()[name = tensor("op_5465_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5465_end_mask_0 = const()[name = tensor("op_5465_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5465_cast_fp16 = slice_by_index(begin = var_5465_begin_0, end = var_5465_end_0, end_mask = var_5465_end_mask_0, x = var_5401_cast_fp16)[name = tensor("op_5465_cast_fp16")]; tensor var_5466_begin_0 = const()[name = tensor("op_5466_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5466_end_0 = const()[name = tensor("op_5466_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5466_end_mask_0 = const()[name = tensor("op_5466_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5466_cast_fp16 = slice_by_index(begin = var_5466_begin_0, end = var_5466_end_0, end_mask = var_5466_end_mask_0, x = var_5405_cast_fp16)[name = tensor("op_5466_cast_fp16")]; tensor var_5467_begin_0 = const()[name = tensor("op_5467_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5467_end_0 = const()[name = tensor("op_5467_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5467_end_mask_0 = const()[name = tensor("op_5467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5467_cast_fp16 = slice_by_index(begin = var_5467_begin_0, end = var_5467_end_0, end_mask = var_5467_end_mask_0, x = var_5405_cast_fp16)[name = tensor("op_5467_cast_fp16")]; tensor var_5468_begin_0 = const()[name = tensor("op_5468_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5468_end_0 = const()[name = tensor("op_5468_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5468_end_mask_0 = const()[name = tensor("op_5468_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5468_cast_fp16 = slice_by_index(begin = var_5468_begin_0, end = var_5468_end_0, end_mask = var_5468_end_mask_0, x = var_5405_cast_fp16)[name = tensor("op_5468_cast_fp16")]; tensor var_5469_begin_0 = const()[name = tensor("op_5469_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5469_end_0 = const()[name = tensor("op_5469_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5469_end_mask_0 = const()[name = tensor("op_5469_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5469_cast_fp16 = slice_by_index(begin = var_5469_begin_0, end = var_5469_end_0, end_mask = var_5469_end_mask_0, x = var_5405_cast_fp16)[name = tensor("op_5469_cast_fp16")]; tensor var_5470_begin_0 = const()[name = tensor("op_5470_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5470_end_0 = const()[name = tensor("op_5470_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5470_end_mask_0 = const()[name = tensor("op_5470_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5470_cast_fp16 = slice_by_index(begin = var_5470_begin_0, end = var_5470_end_0, end_mask = var_5470_end_mask_0, x = var_5405_cast_fp16)[name = tensor("op_5470_cast_fp16")]; tensor var_5471_begin_0 = const()[name = tensor("op_5471_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5471_end_0 = const()[name = tensor("op_5471_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5471_end_mask_0 = const()[name = tensor("op_5471_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5471_cast_fp16 = slice_by_index(begin = var_5471_begin_0, end = var_5471_end_0, end_mask = var_5471_end_mask_0, x = var_5405_cast_fp16)[name = tensor("op_5471_cast_fp16")]; tensor var_5472_begin_0 = const()[name = tensor("op_5472_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5472_end_0 = const()[name = tensor("op_5472_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5472_end_mask_0 = const()[name = tensor("op_5472_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5472_cast_fp16 = slice_by_index(begin = var_5472_begin_0, end = var_5472_end_0, end_mask = var_5472_end_mask_0, x = var_5409_cast_fp16)[name = tensor("op_5472_cast_fp16")]; tensor var_5473_begin_0 = const()[name = tensor("op_5473_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5473_end_0 = const()[name = tensor("op_5473_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5473_end_mask_0 = const()[name = tensor("op_5473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5473_cast_fp16 = slice_by_index(begin = var_5473_begin_0, end = var_5473_end_0, end_mask = var_5473_end_mask_0, x = var_5409_cast_fp16)[name = tensor("op_5473_cast_fp16")]; tensor var_5474_begin_0 = const()[name = tensor("op_5474_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5474_end_0 = const()[name = tensor("op_5474_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5474_end_mask_0 = const()[name = tensor("op_5474_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5474_cast_fp16 = slice_by_index(begin = var_5474_begin_0, end = var_5474_end_0, end_mask = var_5474_end_mask_0, x = var_5409_cast_fp16)[name = tensor("op_5474_cast_fp16")]; tensor var_5475_begin_0 = const()[name = tensor("op_5475_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5475_end_0 = const()[name = tensor("op_5475_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5475_end_mask_0 = const()[name = tensor("op_5475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5475_cast_fp16 = slice_by_index(begin = var_5475_begin_0, end = var_5475_end_0, end_mask = var_5475_end_mask_0, x = var_5409_cast_fp16)[name = tensor("op_5475_cast_fp16")]; tensor var_5476_begin_0 = const()[name = tensor("op_5476_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5476_end_0 = const()[name = tensor("op_5476_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5476_end_mask_0 = const()[name = tensor("op_5476_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5476_cast_fp16 = slice_by_index(begin = var_5476_begin_0, end = var_5476_end_0, end_mask = var_5476_end_mask_0, x = var_5409_cast_fp16)[name = tensor("op_5476_cast_fp16")]; tensor var_5477_begin_0 = const()[name = tensor("op_5477_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5477_end_0 = const()[name = tensor("op_5477_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5477_end_mask_0 = const()[name = tensor("op_5477_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5477_cast_fp16 = slice_by_index(begin = var_5477_begin_0, end = var_5477_end_0, end_mask = var_5477_end_mask_0, x = var_5409_cast_fp16)[name = tensor("op_5477_cast_fp16")]; tensor var_5478_begin_0 = const()[name = tensor("op_5478_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5478_end_0 = const()[name = tensor("op_5478_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5478_end_mask_0 = const()[name = tensor("op_5478_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5478_cast_fp16 = slice_by_index(begin = var_5478_begin_0, end = var_5478_end_0, end_mask = var_5478_end_mask_0, x = var_5413_cast_fp16)[name = tensor("op_5478_cast_fp16")]; tensor var_5479_begin_0 = const()[name = tensor("op_5479_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5479_end_0 = const()[name = tensor("op_5479_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5479_end_mask_0 = const()[name = tensor("op_5479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5479_cast_fp16 = slice_by_index(begin = var_5479_begin_0, end = var_5479_end_0, end_mask = var_5479_end_mask_0, x = var_5413_cast_fp16)[name = tensor("op_5479_cast_fp16")]; tensor var_5480_begin_0 = const()[name = tensor("op_5480_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5480_end_0 = const()[name = tensor("op_5480_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5480_end_mask_0 = const()[name = tensor("op_5480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5480_cast_fp16 = slice_by_index(begin = var_5480_begin_0, end = var_5480_end_0, end_mask = var_5480_end_mask_0, x = var_5413_cast_fp16)[name = tensor("op_5480_cast_fp16")]; tensor var_5481_begin_0 = const()[name = tensor("op_5481_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5481_end_0 = const()[name = tensor("op_5481_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5481_end_mask_0 = const()[name = tensor("op_5481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5481_cast_fp16 = slice_by_index(begin = var_5481_begin_0, end = var_5481_end_0, end_mask = var_5481_end_mask_0, x = var_5413_cast_fp16)[name = tensor("op_5481_cast_fp16")]; tensor var_5482_begin_0 = const()[name = tensor("op_5482_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5482_end_0 = const()[name = tensor("op_5482_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5482_end_mask_0 = const()[name = tensor("op_5482_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5482_cast_fp16 = slice_by_index(begin = var_5482_begin_0, end = var_5482_end_0, end_mask = var_5482_end_mask_0, x = var_5413_cast_fp16)[name = tensor("op_5482_cast_fp16")]; tensor var_5483_begin_0 = const()[name = tensor("op_5483_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5483_end_0 = const()[name = tensor("op_5483_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5483_end_mask_0 = const()[name = tensor("op_5483_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5483_cast_fp16 = slice_by_index(begin = var_5483_begin_0, end = var_5483_end_0, end_mask = var_5483_end_mask_0, x = var_5413_cast_fp16)[name = tensor("op_5483_cast_fp16")]; tensor var_5484_begin_0 = const()[name = tensor("op_5484_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5484_end_0 = const()[name = tensor("op_5484_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5484_end_mask_0 = const()[name = tensor("op_5484_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5484_cast_fp16 = slice_by_index(begin = var_5484_begin_0, end = var_5484_end_0, end_mask = var_5484_end_mask_0, x = var_5417_cast_fp16)[name = tensor("op_5484_cast_fp16")]; tensor var_5485_begin_0 = const()[name = tensor("op_5485_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5485_end_0 = const()[name = tensor("op_5485_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5485_end_mask_0 = const()[name = tensor("op_5485_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5485_cast_fp16 = slice_by_index(begin = var_5485_begin_0, end = var_5485_end_0, end_mask = var_5485_end_mask_0, x = var_5417_cast_fp16)[name = tensor("op_5485_cast_fp16")]; tensor var_5486_begin_0 = const()[name = tensor("op_5486_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5486_end_0 = const()[name = tensor("op_5486_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5486_end_mask_0 = const()[name = tensor("op_5486_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5486_cast_fp16 = slice_by_index(begin = var_5486_begin_0, end = var_5486_end_0, end_mask = var_5486_end_mask_0, x = var_5417_cast_fp16)[name = tensor("op_5486_cast_fp16")]; tensor var_5487_begin_0 = const()[name = tensor("op_5487_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5487_end_0 = const()[name = tensor("op_5487_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5487_end_mask_0 = const()[name = tensor("op_5487_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5487_cast_fp16 = slice_by_index(begin = var_5487_begin_0, end = var_5487_end_0, end_mask = var_5487_end_mask_0, x = var_5417_cast_fp16)[name = tensor("op_5487_cast_fp16")]; tensor var_5488_begin_0 = const()[name = tensor("op_5488_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5488_end_0 = const()[name = tensor("op_5488_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5488_end_mask_0 = const()[name = tensor("op_5488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5488_cast_fp16 = slice_by_index(begin = var_5488_begin_0, end = var_5488_end_0, end_mask = var_5488_end_mask_0, x = var_5417_cast_fp16)[name = tensor("op_5488_cast_fp16")]; tensor var_5489_begin_0 = const()[name = tensor("op_5489_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5489_end_0 = const()[name = tensor("op_5489_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5489_end_mask_0 = const()[name = tensor("op_5489_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5489_cast_fp16 = slice_by_index(begin = var_5489_begin_0, end = var_5489_end_0, end_mask = var_5489_end_mask_0, x = var_5417_cast_fp16)[name = tensor("op_5489_cast_fp16")]; tensor var_5490_begin_0 = const()[name = tensor("op_5490_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5490_end_0 = const()[name = tensor("op_5490_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_5490_end_mask_0 = const()[name = tensor("op_5490_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5490_cast_fp16 = slice_by_index(begin = var_5490_begin_0, end = var_5490_end_0, end_mask = var_5490_end_mask_0, x = var_5421_cast_fp16)[name = tensor("op_5490_cast_fp16")]; tensor var_5491_begin_0 = const()[name = tensor("op_5491_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5491_end_0 = const()[name = tensor("op_5491_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_5491_end_mask_0 = const()[name = tensor("op_5491_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5491_cast_fp16 = slice_by_index(begin = var_5491_begin_0, end = var_5491_end_0, end_mask = var_5491_end_mask_0, x = var_5421_cast_fp16)[name = tensor("op_5491_cast_fp16")]; tensor var_5492_begin_0 = const()[name = tensor("op_5492_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5492_end_0 = const()[name = tensor("op_5492_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_5492_end_mask_0 = const()[name = tensor("op_5492_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5492_cast_fp16 = slice_by_index(begin = var_5492_begin_0, end = var_5492_end_0, end_mask = var_5492_end_mask_0, x = var_5421_cast_fp16)[name = tensor("op_5492_cast_fp16")]; tensor var_5493_begin_0 = const()[name = tensor("op_5493_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5493_end_0 = const()[name = tensor("op_5493_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_5493_end_mask_0 = const()[name = tensor("op_5493_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5493_cast_fp16 = slice_by_index(begin = var_5493_begin_0, end = var_5493_end_0, end_mask = var_5493_end_mask_0, x = var_5421_cast_fp16)[name = tensor("op_5493_cast_fp16")]; tensor var_5494_begin_0 = const()[name = tensor("op_5494_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5494_end_0 = const()[name = tensor("op_5494_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_5494_end_mask_0 = const()[name = tensor("op_5494_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5494_cast_fp16 = slice_by_index(begin = var_5494_begin_0, end = var_5494_end_0, end_mask = var_5494_end_mask_0, x = var_5421_cast_fp16)[name = tensor("op_5494_cast_fp16")]; tensor var_5495_begin_0 = const()[name = tensor("op_5495_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_5495_end_0 = const()[name = tensor("op_5495_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_5495_end_mask_0 = const()[name = tensor("op_5495_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5495_cast_fp16 = slice_by_index(begin = var_5495_begin_0, end = var_5495_end_0, end_mask = var_5495_end_mask_0, x = var_5421_cast_fp16)[name = tensor("op_5495_cast_fp16")]; tensor k_13_perm_0 = const()[name = tensor("k_13_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_5500_begin_0 = const()[name = tensor("op_5500_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5500_end_0 = const()[name = tensor("op_5500_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_5500_end_mask_0 = const()[name = tensor("op_5500_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = key_13_cast_fp16)[name = tensor("transpose_5")]; tensor var_5500_cast_fp16 = slice_by_index(begin = var_5500_begin_0, end = var_5500_end_0, end_mask = var_5500_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5500_cast_fp16")]; tensor var_5504_begin_0 = const()[name = tensor("op_5504_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_5504_end_0 = const()[name = tensor("op_5504_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_5504_end_mask_0 = const()[name = tensor("op_5504_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5504_cast_fp16 = slice_by_index(begin = var_5504_begin_0, end = var_5504_end_0, end_mask = var_5504_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5504_cast_fp16")]; tensor var_5508_begin_0 = const()[name = tensor("op_5508_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_5508_end_0 = const()[name = tensor("op_5508_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_5508_end_mask_0 = const()[name = tensor("op_5508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5508_cast_fp16 = slice_by_index(begin = var_5508_begin_0, end = var_5508_end_0, end_mask = var_5508_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5508_cast_fp16")]; tensor var_5512_begin_0 = const()[name = tensor("op_5512_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_5512_end_0 = const()[name = tensor("op_5512_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_5512_end_mask_0 = const()[name = tensor("op_5512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5512_cast_fp16 = slice_by_index(begin = var_5512_begin_0, end = var_5512_end_0, end_mask = var_5512_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5512_cast_fp16")]; tensor var_5516_begin_0 = const()[name = tensor("op_5516_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5516_end_0 = const()[name = tensor("op_5516_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_5516_end_mask_0 = const()[name = tensor("op_5516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5516_cast_fp16 = slice_by_index(begin = var_5516_begin_0, end = var_5516_end_0, end_mask = var_5516_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5516_cast_fp16")]; tensor var_5520_begin_0 = const()[name = tensor("op_5520_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_5520_end_0 = const()[name = tensor("op_5520_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_5520_end_mask_0 = const()[name = tensor("op_5520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5520_cast_fp16 = slice_by_index(begin = var_5520_begin_0, end = var_5520_end_0, end_mask = var_5520_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5520_cast_fp16")]; tensor var_5524_begin_0 = const()[name = tensor("op_5524_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_5524_end_0 = const()[name = tensor("op_5524_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_5524_end_mask_0 = const()[name = tensor("op_5524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5524_cast_fp16 = slice_by_index(begin = var_5524_begin_0, end = var_5524_end_0, end_mask = var_5524_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5524_cast_fp16")]; tensor var_5528_begin_0 = const()[name = tensor("op_5528_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_5528_end_0 = const()[name = tensor("op_5528_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_5528_end_mask_0 = const()[name = tensor("op_5528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5528_cast_fp16 = slice_by_index(begin = var_5528_begin_0, end = var_5528_end_0, end_mask = var_5528_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5528_cast_fp16")]; tensor var_5532_begin_0 = const()[name = tensor("op_5532_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5532_end_0 = const()[name = tensor("op_5532_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_5532_end_mask_0 = const()[name = tensor("op_5532_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5532_cast_fp16 = slice_by_index(begin = var_5532_begin_0, end = var_5532_end_0, end_mask = var_5532_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5532_cast_fp16")]; tensor var_5536_begin_0 = const()[name = tensor("op_5536_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_5536_end_0 = const()[name = tensor("op_5536_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_5536_end_mask_0 = const()[name = tensor("op_5536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5536_cast_fp16 = slice_by_index(begin = var_5536_begin_0, end = var_5536_end_0, end_mask = var_5536_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5536_cast_fp16")]; tensor var_5540_begin_0 = const()[name = tensor("op_5540_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_5540_end_0 = const()[name = tensor("op_5540_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_5540_end_mask_0 = const()[name = tensor("op_5540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5540_cast_fp16 = slice_by_index(begin = var_5540_begin_0, end = var_5540_end_0, end_mask = var_5540_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5540_cast_fp16")]; tensor var_5544_begin_0 = const()[name = tensor("op_5544_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_5544_end_0 = const()[name = tensor("op_5544_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_5544_end_mask_0 = const()[name = tensor("op_5544_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5544_cast_fp16 = slice_by_index(begin = var_5544_begin_0, end = var_5544_end_0, end_mask = var_5544_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_5544_cast_fp16")]; tensor var_5546_begin_0 = const()[name = tensor("op_5546_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5546_end_0 = const()[name = tensor("op_5546_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5546_end_mask_0 = const()[name = tensor("op_5546_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5546_cast_fp16 = slice_by_index(begin = var_5546_begin_0, end = var_5546_end_0, end_mask = var_5546_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5546_cast_fp16")]; tensor var_5550_begin_0 = const()[name = tensor("op_5550_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_5550_end_0 = const()[name = tensor("op_5550_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_5550_end_mask_0 = const()[name = tensor("op_5550_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5550_cast_fp16 = slice_by_index(begin = var_5550_begin_0, end = var_5550_end_0, end_mask = var_5550_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5550_cast_fp16")]; tensor var_5554_begin_0 = const()[name = tensor("op_5554_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_5554_end_0 = const()[name = tensor("op_5554_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_5554_end_mask_0 = const()[name = tensor("op_5554_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5554_cast_fp16 = slice_by_index(begin = var_5554_begin_0, end = var_5554_end_0, end_mask = var_5554_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5554_cast_fp16")]; tensor var_5558_begin_0 = const()[name = tensor("op_5558_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_5558_end_0 = const()[name = tensor("op_5558_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_5558_end_mask_0 = const()[name = tensor("op_5558_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5558_cast_fp16 = slice_by_index(begin = var_5558_begin_0, end = var_5558_end_0, end_mask = var_5558_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5558_cast_fp16")]; tensor var_5562_begin_0 = const()[name = tensor("op_5562_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_5562_end_0 = const()[name = tensor("op_5562_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_5562_end_mask_0 = const()[name = tensor("op_5562_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5562_cast_fp16 = slice_by_index(begin = var_5562_begin_0, end = var_5562_end_0, end_mask = var_5562_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5562_cast_fp16")]; tensor var_5566_begin_0 = const()[name = tensor("op_5566_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5566_end_0 = const()[name = tensor("op_5566_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_5566_end_mask_0 = const()[name = tensor("op_5566_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5566_cast_fp16 = slice_by_index(begin = var_5566_begin_0, end = var_5566_end_0, end_mask = var_5566_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5566_cast_fp16")]; tensor var_5570_begin_0 = const()[name = tensor("op_5570_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_5570_end_0 = const()[name = tensor("op_5570_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_5570_end_mask_0 = const()[name = tensor("op_5570_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5570_cast_fp16 = slice_by_index(begin = var_5570_begin_0, end = var_5570_end_0, end_mask = var_5570_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5570_cast_fp16")]; tensor var_5574_begin_0 = const()[name = tensor("op_5574_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_5574_end_0 = const()[name = tensor("op_5574_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_5574_end_mask_0 = const()[name = tensor("op_5574_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5574_cast_fp16 = slice_by_index(begin = var_5574_begin_0, end = var_5574_end_0, end_mask = var_5574_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5574_cast_fp16")]; tensor var_5578_begin_0 = const()[name = tensor("op_5578_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_5578_end_0 = const()[name = tensor("op_5578_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_5578_end_mask_0 = const()[name = tensor("op_5578_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5578_cast_fp16 = slice_by_index(begin = var_5578_begin_0, end = var_5578_end_0, end_mask = var_5578_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5578_cast_fp16")]; tensor var_5582_begin_0 = const()[name = tensor("op_5582_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_5582_end_0 = const()[name = tensor("op_5582_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_5582_end_mask_0 = const()[name = tensor("op_5582_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5582_cast_fp16 = slice_by_index(begin = var_5582_begin_0, end = var_5582_end_0, end_mask = var_5582_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5582_cast_fp16")]; tensor var_5586_begin_0 = const()[name = tensor("op_5586_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5586_end_0 = const()[name = tensor("op_5586_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_5586_end_mask_0 = const()[name = tensor("op_5586_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5586_cast_fp16 = slice_by_index(begin = var_5586_begin_0, end = var_5586_end_0, end_mask = var_5586_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5586_cast_fp16")]; tensor var_5590_begin_0 = const()[name = tensor("op_5590_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_5590_end_0 = const()[name = tensor("op_5590_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_5590_end_mask_0 = const()[name = tensor("op_5590_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_5590_cast_fp16 = slice_by_index(begin = var_5590_begin_0, end = var_5590_end_0, end_mask = var_5590_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_5590_cast_fp16")]; tensor _SplitHeadsQ__mh_w_865_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_865_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_865_equation_0, values = (var_5500_cast_fp16, var_5424_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_865_cast_fp16")]; tensor _SplitHeadsQ__mh_w_867_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_867_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_867_equation_0, values = (var_5500_cast_fp16, var_5425_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_867_cast_fp16")]; tensor _SplitHeadsQ__mh_w_869_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_869_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_869_equation_0, values = (var_5500_cast_fp16, var_5426_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_869_cast_fp16")]; tensor _SplitHeadsQ__mh_w_871_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_871_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_871_equation_0, values = (var_5500_cast_fp16, var_5427_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_871_cast_fp16")]; tensor _SplitHeadsQ__mh_w_873_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_873_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_873_equation_0, values = (var_5500_cast_fp16, var_5428_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_873_cast_fp16")]; tensor _SplitHeadsQ__mh_w_875_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_875_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_875_equation_0, values = (var_5500_cast_fp16, var_5429_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_875_cast_fp16")]; tensor _SplitHeadsQ__mh_w_877_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_877_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_877_equation_0, values = (var_5504_cast_fp16, var_5430_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_877_cast_fp16")]; tensor _SplitHeadsQ__mh_w_879_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_879_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_879_equation_0, values = (var_5504_cast_fp16, var_5431_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_879_cast_fp16")]; tensor _SplitHeadsQ__mh_w_881_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_881_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_881_equation_0, values = (var_5504_cast_fp16, var_5432_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_881_cast_fp16")]; tensor _SplitHeadsQ__mh_w_883_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_883_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_883_equation_0, values = (var_5504_cast_fp16, var_5433_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_883_cast_fp16")]; tensor _SplitHeadsQ__mh_w_885_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_885_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_885_equation_0, values = (var_5504_cast_fp16, var_5434_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_885_cast_fp16")]; tensor _SplitHeadsQ__mh_w_887_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_887_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_887_equation_0, values = (var_5504_cast_fp16, var_5435_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_887_cast_fp16")]; tensor _SplitHeadsQ__mh_w_889_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_889_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_889_equation_0, values = (var_5508_cast_fp16, var_5436_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_889_cast_fp16")]; tensor _SplitHeadsQ__mh_w_891_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_891_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_891_equation_0, values = (var_5508_cast_fp16, var_5437_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_893_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_893_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_893_equation_0, values = (var_5508_cast_fp16, var_5438_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_893_cast_fp16")]; tensor _SplitHeadsQ__mh_w_895_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_895_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_895_equation_0, values = (var_5508_cast_fp16, var_5439_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_895_cast_fp16")]; tensor _SplitHeadsQ__mh_w_897_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_897_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_897_equation_0, values = (var_5508_cast_fp16, var_5440_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_897_cast_fp16")]; tensor _SplitHeadsQ__mh_w_899_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_899_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_899_equation_0, values = (var_5508_cast_fp16, var_5441_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_899_cast_fp16")]; tensor _SplitHeadsQ__mh_w_901_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_901_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_901_equation_0, values = (var_5512_cast_fp16, var_5442_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_901_cast_fp16")]; tensor _SplitHeadsQ__mh_w_903_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_903_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_903_equation_0, values = (var_5512_cast_fp16, var_5443_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_903_cast_fp16")]; tensor _SplitHeadsQ__mh_w_905_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_905_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_905_equation_0, values = (var_5512_cast_fp16, var_5444_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_905_cast_fp16")]; tensor _SplitHeadsQ__mh_w_907_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_907_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_907_equation_0, values = (var_5512_cast_fp16, var_5445_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_907_cast_fp16")]; tensor _SplitHeadsQ__mh_w_909_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_909_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_909_equation_0, values = (var_5512_cast_fp16, var_5446_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_909_cast_fp16")]; tensor _SplitHeadsQ__mh_w_911_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_911_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_911_equation_0, values = (var_5512_cast_fp16, var_5447_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_911_cast_fp16")]; tensor _SplitHeadsQ__mh_w_913_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_913_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_913_equation_0, values = (var_5516_cast_fp16, var_5448_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_913_cast_fp16")]; tensor _SplitHeadsQ__mh_w_915_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_915_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_915_equation_0, values = (var_5516_cast_fp16, var_5449_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_915_cast_fp16")]; tensor _SplitHeadsQ__mh_w_917_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_917_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_917_equation_0, values = (var_5516_cast_fp16, var_5450_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_917_cast_fp16")]; tensor _SplitHeadsQ__mh_w_919_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_919_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_919_equation_0, values = (var_5516_cast_fp16, var_5451_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_919_cast_fp16")]; tensor _SplitHeadsQ__mh_w_921_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_921_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_921_equation_0, values = (var_5516_cast_fp16, var_5452_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_921_cast_fp16")]; tensor _SplitHeadsQ__mh_w_923_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_923_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_923_equation_0, values = (var_5516_cast_fp16, var_5453_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_923_cast_fp16")]; tensor _SplitHeadsQ__mh_w_925_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_925_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_925_equation_0, values = (var_5520_cast_fp16, var_5454_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_925_cast_fp16")]; tensor _SplitHeadsQ__mh_w_927_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_927_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_927_equation_0, values = (var_5520_cast_fp16, var_5455_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_927_cast_fp16")]; tensor _SplitHeadsQ__mh_w_929_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_929_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_929_equation_0, values = (var_5520_cast_fp16, var_5456_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_929_cast_fp16")]; tensor _SplitHeadsQ__mh_w_931_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_931_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_931_equation_0, values = (var_5520_cast_fp16, var_5457_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_931_cast_fp16")]; tensor _SplitHeadsQ__mh_w_933_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_933_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_933_equation_0, values = (var_5520_cast_fp16, var_5458_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_933_cast_fp16")]; tensor _SplitHeadsQ__mh_w_935_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_935_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_935_equation_0, values = (var_5520_cast_fp16, var_5459_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_935_cast_fp16")]; tensor _SplitHeadsQ__mh_w_937_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_937_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_937_equation_0, values = (var_5524_cast_fp16, var_5460_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_937_cast_fp16")]; tensor _SplitHeadsQ__mh_w_939_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_939_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_939_equation_0, values = (var_5524_cast_fp16, var_5461_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_939_cast_fp16")]; tensor _SplitHeadsQ__mh_w_941_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_941_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_941_equation_0, values = (var_5524_cast_fp16, var_5462_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_941_cast_fp16")]; tensor _SplitHeadsQ__mh_w_943_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_943_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_943_equation_0, values = (var_5524_cast_fp16, var_5463_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_943_cast_fp16")]; tensor _SplitHeadsQ__mh_w_945_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_945_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_945_equation_0, values = (var_5524_cast_fp16, var_5464_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_945_cast_fp16")]; tensor _SplitHeadsQ__mh_w_947_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_947_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_947_equation_0, values = (var_5524_cast_fp16, var_5465_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_947_cast_fp16")]; tensor _SplitHeadsQ__mh_w_949_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_949_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_949_equation_0, values = (var_5528_cast_fp16, var_5466_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_949_cast_fp16")]; tensor _SplitHeadsQ__mh_w_951_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_951_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_951_equation_0, values = (var_5528_cast_fp16, var_5467_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_951_cast_fp16")]; tensor _SplitHeadsQ__mh_w_953_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_953_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_953_equation_0, values = (var_5528_cast_fp16, var_5468_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_953_cast_fp16")]; tensor _SplitHeadsQ__mh_w_955_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_955_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_955_equation_0, values = (var_5528_cast_fp16, var_5469_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_955_cast_fp16")]; tensor _SplitHeadsQ__mh_w_957_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_957_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_957_equation_0, values = (var_5528_cast_fp16, var_5470_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_957_cast_fp16")]; tensor _SplitHeadsQ__mh_w_959_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_959_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_959_equation_0, values = (var_5528_cast_fp16, var_5471_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_959_cast_fp16")]; tensor _SplitHeadsQ__mh_w_961_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_961_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_961_equation_0, values = (var_5532_cast_fp16, var_5472_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_961_cast_fp16")]; tensor _SplitHeadsQ__mh_w_963_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_963_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_963_equation_0, values = (var_5532_cast_fp16, var_5473_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_963_cast_fp16")]; tensor _SplitHeadsQ__mh_w_965_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_965_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_965_equation_0, values = (var_5532_cast_fp16, var_5474_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_965_cast_fp16")]; tensor _SplitHeadsQ__mh_w_967_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_967_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_967_equation_0, values = (var_5532_cast_fp16, var_5475_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_967_cast_fp16")]; tensor _SplitHeadsQ__mh_w_969_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_969_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_969_equation_0, values = (var_5532_cast_fp16, var_5476_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_969_cast_fp16")]; tensor _SplitHeadsQ__mh_w_971_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_971_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_971_equation_0, values = (var_5532_cast_fp16, var_5477_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_971_cast_fp16")]; tensor _SplitHeadsQ__mh_w_973_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_973_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_973_equation_0, values = (var_5536_cast_fp16, var_5478_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_973_cast_fp16")]; tensor _SplitHeadsQ__mh_w_975_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_975_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_975_equation_0, values = (var_5536_cast_fp16, var_5479_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_975_cast_fp16")]; tensor _SplitHeadsQ__mh_w_977_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_977_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_977_equation_0, values = (var_5536_cast_fp16, var_5480_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_977_cast_fp16")]; tensor _SplitHeadsQ__mh_w_979_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_979_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_979_equation_0, values = (var_5536_cast_fp16, var_5481_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_979_cast_fp16")]; tensor _SplitHeadsQ__mh_w_981_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_981_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_981_equation_0, values = (var_5536_cast_fp16, var_5482_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_981_cast_fp16")]; tensor _SplitHeadsQ__mh_w_983_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_983_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_983_equation_0, values = (var_5536_cast_fp16, var_5483_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_983_cast_fp16")]; tensor _SplitHeadsQ__mh_w_985_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_985_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_985_equation_0, values = (var_5540_cast_fp16, var_5484_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_985_cast_fp16")]; tensor _SplitHeadsQ__mh_w_987_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_987_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_987_equation_0, values = (var_5540_cast_fp16, var_5485_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_987_cast_fp16")]; tensor _SplitHeadsQ__mh_w_989_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_989_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_989_equation_0, values = (var_5540_cast_fp16, var_5486_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_989_cast_fp16")]; tensor _SplitHeadsQ__mh_w_991_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_991_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_991_equation_0, values = (var_5540_cast_fp16, var_5487_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_991_cast_fp16")]; tensor _SplitHeadsQ__mh_w_993_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_993_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_993_equation_0, values = (var_5540_cast_fp16, var_5488_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_993_cast_fp16")]; tensor _SplitHeadsQ__mh_w_995_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_995_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_995_equation_0, values = (var_5540_cast_fp16, var_5489_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_995_cast_fp16")]; tensor _SplitHeadsQ__mh_w_997_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_997_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_997_equation_0, values = (var_5544_cast_fp16, var_5490_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_997_cast_fp16")]; tensor _SplitHeadsQ__mh_w_999_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_999_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_999_equation_0, values = (var_5544_cast_fp16, var_5491_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_999_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1001_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1001_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1001_equation_0, values = (var_5544_cast_fp16, var_5492_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1001_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1003_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1003_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1003_equation_0, values = (var_5544_cast_fp16, var_5493_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1003_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1005_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1005_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1005_equation_0, values = (var_5544_cast_fp16, var_5494_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1005_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1007_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1007_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1007_equation_0, values = (var_5544_cast_fp16, var_5495_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1007_cast_fp16")]; tensor var_5737_to_fp16 = const()[name = tensor("op_5737_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_865_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_865_cast_fp16, y = var_5737_to_fp16)[name = tensor("aw_chunk_865_cast_fp16")]; tensor var_5739_to_fp16 = const()[name = tensor("op_5739_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_867_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_867_cast_fp16, y = var_5739_to_fp16)[name = tensor("aw_chunk_867_cast_fp16")]; tensor var_5741_to_fp16 = const()[name = tensor("op_5741_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_869_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_869_cast_fp16, y = var_5741_to_fp16)[name = tensor("aw_chunk_869_cast_fp16")]; tensor var_5743_to_fp16 = const()[name = tensor("op_5743_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_871_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_871_cast_fp16, y = var_5743_to_fp16)[name = tensor("aw_chunk_871_cast_fp16")]; tensor var_5745_to_fp16 = const()[name = tensor("op_5745_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_873_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_873_cast_fp16, y = var_5745_to_fp16)[name = tensor("aw_chunk_873_cast_fp16")]; tensor var_5747_to_fp16 = const()[name = tensor("op_5747_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_875_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_875_cast_fp16, y = var_5747_to_fp16)[name = tensor("aw_chunk_875_cast_fp16")]; tensor var_5749_to_fp16 = const()[name = tensor("op_5749_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_877_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_877_cast_fp16, y = var_5749_to_fp16)[name = tensor("aw_chunk_877_cast_fp16")]; tensor var_5751_to_fp16 = const()[name = tensor("op_5751_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_879_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_879_cast_fp16, y = var_5751_to_fp16)[name = tensor("aw_chunk_879_cast_fp16")]; tensor var_5753_to_fp16 = const()[name = tensor("op_5753_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_881_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_881_cast_fp16, y = var_5753_to_fp16)[name = tensor("aw_chunk_881_cast_fp16")]; tensor var_5755_to_fp16 = const()[name = tensor("op_5755_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_883_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_883_cast_fp16, y = var_5755_to_fp16)[name = tensor("aw_chunk_883_cast_fp16")]; tensor var_5757_to_fp16 = const()[name = tensor("op_5757_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_885_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_885_cast_fp16, y = var_5757_to_fp16)[name = tensor("aw_chunk_885_cast_fp16")]; tensor var_5759_to_fp16 = const()[name = tensor("op_5759_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_887_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_887_cast_fp16, y = var_5759_to_fp16)[name = tensor("aw_chunk_887_cast_fp16")]; tensor var_5761_to_fp16 = const()[name = tensor("op_5761_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_889_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_889_cast_fp16, y = var_5761_to_fp16)[name = tensor("aw_chunk_889_cast_fp16")]; tensor var_5763_to_fp16 = const()[name = tensor("op_5763_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_891_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_891_cast_fp16, y = var_5763_to_fp16)[name = tensor("aw_chunk_891_cast_fp16")]; tensor var_5765_to_fp16 = const()[name = tensor("op_5765_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_893_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_893_cast_fp16, y = var_5765_to_fp16)[name = tensor("aw_chunk_893_cast_fp16")]; tensor var_5767_to_fp16 = const()[name = tensor("op_5767_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_895_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_895_cast_fp16, y = var_5767_to_fp16)[name = tensor("aw_chunk_895_cast_fp16")]; tensor var_5769_to_fp16 = const()[name = tensor("op_5769_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_897_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_897_cast_fp16, y = var_5769_to_fp16)[name = tensor("aw_chunk_897_cast_fp16")]; tensor var_5771_to_fp16 = const()[name = tensor("op_5771_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_899_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_899_cast_fp16, y = var_5771_to_fp16)[name = tensor("aw_chunk_899_cast_fp16")]; tensor var_5773_to_fp16 = const()[name = tensor("op_5773_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_901_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_901_cast_fp16, y = var_5773_to_fp16)[name = tensor("aw_chunk_901_cast_fp16")]; tensor var_5775_to_fp16 = const()[name = tensor("op_5775_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_903_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_903_cast_fp16, y = var_5775_to_fp16)[name = tensor("aw_chunk_903_cast_fp16")]; tensor var_5777_to_fp16 = const()[name = tensor("op_5777_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_905_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_905_cast_fp16, y = var_5777_to_fp16)[name = tensor("aw_chunk_905_cast_fp16")]; tensor var_5779_to_fp16 = const()[name = tensor("op_5779_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_907_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_907_cast_fp16, y = var_5779_to_fp16)[name = tensor("aw_chunk_907_cast_fp16")]; tensor var_5781_to_fp16 = const()[name = tensor("op_5781_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_909_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_909_cast_fp16, y = var_5781_to_fp16)[name = tensor("aw_chunk_909_cast_fp16")]; tensor var_5783_to_fp16 = const()[name = tensor("op_5783_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_911_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_911_cast_fp16, y = var_5783_to_fp16)[name = tensor("aw_chunk_911_cast_fp16")]; tensor var_5785_to_fp16 = const()[name = tensor("op_5785_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_913_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_913_cast_fp16, y = var_5785_to_fp16)[name = tensor("aw_chunk_913_cast_fp16")]; tensor var_5787_to_fp16 = const()[name = tensor("op_5787_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_915_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_915_cast_fp16, y = var_5787_to_fp16)[name = tensor("aw_chunk_915_cast_fp16")]; tensor var_5789_to_fp16 = const()[name = tensor("op_5789_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_917_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_917_cast_fp16, y = var_5789_to_fp16)[name = tensor("aw_chunk_917_cast_fp16")]; tensor var_5791_to_fp16 = const()[name = tensor("op_5791_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_919_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_919_cast_fp16, y = var_5791_to_fp16)[name = tensor("aw_chunk_919_cast_fp16")]; tensor var_5793_to_fp16 = const()[name = tensor("op_5793_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_921_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_921_cast_fp16, y = var_5793_to_fp16)[name = tensor("aw_chunk_921_cast_fp16")]; tensor var_5795_to_fp16 = const()[name = tensor("op_5795_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_923_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_923_cast_fp16, y = var_5795_to_fp16)[name = tensor("aw_chunk_923_cast_fp16")]; tensor var_5797_to_fp16 = const()[name = tensor("op_5797_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_925_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_925_cast_fp16, y = var_5797_to_fp16)[name = tensor("aw_chunk_925_cast_fp16")]; tensor var_5799_to_fp16 = const()[name = tensor("op_5799_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_927_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_927_cast_fp16, y = var_5799_to_fp16)[name = tensor("aw_chunk_927_cast_fp16")]; tensor var_5801_to_fp16 = const()[name = tensor("op_5801_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_929_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_929_cast_fp16, y = var_5801_to_fp16)[name = tensor("aw_chunk_929_cast_fp16")]; tensor var_5803_to_fp16 = const()[name = tensor("op_5803_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_931_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_931_cast_fp16, y = var_5803_to_fp16)[name = tensor("aw_chunk_931_cast_fp16")]; tensor var_5805_to_fp16 = const()[name = tensor("op_5805_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_933_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_933_cast_fp16, y = var_5805_to_fp16)[name = tensor("aw_chunk_933_cast_fp16")]; tensor var_5807_to_fp16 = const()[name = tensor("op_5807_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_935_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_935_cast_fp16, y = var_5807_to_fp16)[name = tensor("aw_chunk_935_cast_fp16")]; tensor var_5809_to_fp16 = const()[name = tensor("op_5809_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_937_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_937_cast_fp16, y = var_5809_to_fp16)[name = tensor("aw_chunk_937_cast_fp16")]; tensor var_5811_to_fp16 = const()[name = tensor("op_5811_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_939_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_939_cast_fp16, y = var_5811_to_fp16)[name = tensor("aw_chunk_939_cast_fp16")]; tensor var_5813_to_fp16 = const()[name = tensor("op_5813_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_941_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_941_cast_fp16, y = var_5813_to_fp16)[name = tensor("aw_chunk_941_cast_fp16")]; tensor var_5815_to_fp16 = const()[name = tensor("op_5815_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_943_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_943_cast_fp16, y = var_5815_to_fp16)[name = tensor("aw_chunk_943_cast_fp16")]; tensor var_5817_to_fp16 = const()[name = tensor("op_5817_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_945_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_945_cast_fp16, y = var_5817_to_fp16)[name = tensor("aw_chunk_945_cast_fp16")]; tensor var_5819_to_fp16 = const()[name = tensor("op_5819_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_947_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_947_cast_fp16, y = var_5819_to_fp16)[name = tensor("aw_chunk_947_cast_fp16")]; tensor var_5821_to_fp16 = const()[name = tensor("op_5821_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_949_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_949_cast_fp16, y = var_5821_to_fp16)[name = tensor("aw_chunk_949_cast_fp16")]; tensor var_5823_to_fp16 = const()[name = tensor("op_5823_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_951_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_951_cast_fp16, y = var_5823_to_fp16)[name = tensor("aw_chunk_951_cast_fp16")]; tensor var_5825_to_fp16 = const()[name = tensor("op_5825_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_953_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_953_cast_fp16, y = var_5825_to_fp16)[name = tensor("aw_chunk_953_cast_fp16")]; tensor var_5827_to_fp16 = const()[name = tensor("op_5827_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_955_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_955_cast_fp16, y = var_5827_to_fp16)[name = tensor("aw_chunk_955_cast_fp16")]; tensor var_5829_to_fp16 = const()[name = tensor("op_5829_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_957_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_957_cast_fp16, y = var_5829_to_fp16)[name = tensor("aw_chunk_957_cast_fp16")]; tensor var_5831_to_fp16 = const()[name = tensor("op_5831_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_959_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_959_cast_fp16, y = var_5831_to_fp16)[name = tensor("aw_chunk_959_cast_fp16")]; tensor var_5833_to_fp16 = const()[name = tensor("op_5833_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_961_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_961_cast_fp16, y = var_5833_to_fp16)[name = tensor("aw_chunk_961_cast_fp16")]; tensor var_5835_to_fp16 = const()[name = tensor("op_5835_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_963_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_963_cast_fp16, y = var_5835_to_fp16)[name = tensor("aw_chunk_963_cast_fp16")]; tensor var_5837_to_fp16 = const()[name = tensor("op_5837_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_965_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_965_cast_fp16, y = var_5837_to_fp16)[name = tensor("aw_chunk_965_cast_fp16")]; tensor var_5839_to_fp16 = const()[name = tensor("op_5839_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_967_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_967_cast_fp16, y = var_5839_to_fp16)[name = tensor("aw_chunk_967_cast_fp16")]; tensor var_5841_to_fp16 = const()[name = tensor("op_5841_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_969_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_969_cast_fp16, y = var_5841_to_fp16)[name = tensor("aw_chunk_969_cast_fp16")]; tensor var_5843_to_fp16 = const()[name = tensor("op_5843_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_971_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_971_cast_fp16, y = var_5843_to_fp16)[name = tensor("aw_chunk_971_cast_fp16")]; tensor var_5845_to_fp16 = const()[name = tensor("op_5845_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_973_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_973_cast_fp16, y = var_5845_to_fp16)[name = tensor("aw_chunk_973_cast_fp16")]; tensor var_5847_to_fp16 = const()[name = tensor("op_5847_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_975_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_975_cast_fp16, y = var_5847_to_fp16)[name = tensor("aw_chunk_975_cast_fp16")]; tensor var_5849_to_fp16 = const()[name = tensor("op_5849_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_977_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_977_cast_fp16, y = var_5849_to_fp16)[name = tensor("aw_chunk_977_cast_fp16")]; tensor var_5851_to_fp16 = const()[name = tensor("op_5851_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_979_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_979_cast_fp16, y = var_5851_to_fp16)[name = tensor("aw_chunk_979_cast_fp16")]; tensor var_5853_to_fp16 = const()[name = tensor("op_5853_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_981_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_981_cast_fp16, y = var_5853_to_fp16)[name = tensor("aw_chunk_981_cast_fp16")]; tensor var_5855_to_fp16 = const()[name = tensor("op_5855_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_983_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_983_cast_fp16, y = var_5855_to_fp16)[name = tensor("aw_chunk_983_cast_fp16")]; tensor var_5857_to_fp16 = const()[name = tensor("op_5857_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_985_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_985_cast_fp16, y = var_5857_to_fp16)[name = tensor("aw_chunk_985_cast_fp16")]; tensor var_5859_to_fp16 = const()[name = tensor("op_5859_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_987_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_987_cast_fp16, y = var_5859_to_fp16)[name = tensor("aw_chunk_987_cast_fp16")]; tensor var_5861_to_fp16 = const()[name = tensor("op_5861_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_989_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_989_cast_fp16, y = var_5861_to_fp16)[name = tensor("aw_chunk_989_cast_fp16")]; tensor var_5863_to_fp16 = const()[name = tensor("op_5863_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_991_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_991_cast_fp16, y = var_5863_to_fp16)[name = tensor("aw_chunk_991_cast_fp16")]; tensor var_5865_to_fp16 = const()[name = tensor("op_5865_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_993_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_993_cast_fp16, y = var_5865_to_fp16)[name = tensor("aw_chunk_993_cast_fp16")]; tensor var_5867_to_fp16 = const()[name = tensor("op_5867_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_995_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_995_cast_fp16, y = var_5867_to_fp16)[name = tensor("aw_chunk_995_cast_fp16")]; tensor var_5869_to_fp16 = const()[name = tensor("op_5869_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_997_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_997_cast_fp16, y = var_5869_to_fp16)[name = tensor("aw_chunk_997_cast_fp16")]; tensor var_5871_to_fp16 = const()[name = tensor("op_5871_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_999_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_999_cast_fp16, y = var_5871_to_fp16)[name = tensor("aw_chunk_999_cast_fp16")]; tensor var_5873_to_fp16 = const()[name = tensor("op_5873_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1001_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1001_cast_fp16, y = var_5873_to_fp16)[name = tensor("aw_chunk_1001_cast_fp16")]; tensor var_5875_to_fp16 = const()[name = tensor("op_5875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1003_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1003_cast_fp16, y = var_5875_to_fp16)[name = tensor("aw_chunk_1003_cast_fp16")]; tensor var_5877_to_fp16 = const()[name = tensor("op_5877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1005_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1005_cast_fp16, y = var_5877_to_fp16)[name = tensor("aw_chunk_1005_cast_fp16")]; tensor var_5879_to_fp16 = const()[name = tensor("op_5879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1007_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1007_cast_fp16, y = var_5879_to_fp16)[name = tensor("aw_chunk_1007_cast_fp16")]; tensor var_5881_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_865_cast_fp16)[name = tensor("op_5881_cast_fp16")]; tensor var_5882_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_867_cast_fp16)[name = tensor("op_5882_cast_fp16")]; tensor var_5883_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_869_cast_fp16)[name = tensor("op_5883_cast_fp16")]; tensor var_5884_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_871_cast_fp16)[name = tensor("op_5884_cast_fp16")]; tensor var_5885_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_873_cast_fp16)[name = tensor("op_5885_cast_fp16")]; tensor var_5886_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_875_cast_fp16)[name = tensor("op_5886_cast_fp16")]; tensor var_5887_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_877_cast_fp16)[name = tensor("op_5887_cast_fp16")]; tensor var_5888_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_879_cast_fp16)[name = tensor("op_5888_cast_fp16")]; tensor var_5889_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_881_cast_fp16)[name = tensor("op_5889_cast_fp16")]; tensor var_5890_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_883_cast_fp16)[name = tensor("op_5890_cast_fp16")]; tensor var_5891_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_885_cast_fp16)[name = tensor("op_5891_cast_fp16")]; tensor var_5892_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_887_cast_fp16)[name = tensor("op_5892_cast_fp16")]; tensor var_5893_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_889_cast_fp16)[name = tensor("op_5893_cast_fp16")]; tensor var_5894_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_891_cast_fp16)[name = tensor("op_5894_cast_fp16")]; tensor var_5895_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_893_cast_fp16)[name = tensor("op_5895_cast_fp16")]; tensor var_5896_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_895_cast_fp16)[name = tensor("op_5896_cast_fp16")]; tensor var_5897_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_897_cast_fp16)[name = tensor("op_5897_cast_fp16")]; tensor var_5898_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_899_cast_fp16)[name = tensor("op_5898_cast_fp16")]; tensor var_5899_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_901_cast_fp16)[name = tensor("op_5899_cast_fp16")]; tensor var_5900_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_903_cast_fp16)[name = tensor("op_5900_cast_fp16")]; tensor var_5901_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_905_cast_fp16)[name = tensor("op_5901_cast_fp16")]; tensor var_5902_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_907_cast_fp16)[name = tensor("op_5902_cast_fp16")]; tensor var_5903_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_909_cast_fp16)[name = tensor("op_5903_cast_fp16")]; tensor var_5904_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_911_cast_fp16)[name = tensor("op_5904_cast_fp16")]; tensor var_5905_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_913_cast_fp16)[name = tensor("op_5905_cast_fp16")]; tensor var_5906_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_915_cast_fp16)[name = tensor("op_5906_cast_fp16")]; tensor var_5907_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_917_cast_fp16)[name = tensor("op_5907_cast_fp16")]; tensor var_5908_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_919_cast_fp16)[name = tensor("op_5908_cast_fp16")]; tensor var_5909_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_921_cast_fp16)[name = tensor("op_5909_cast_fp16")]; tensor var_5910_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_923_cast_fp16)[name = tensor("op_5910_cast_fp16")]; tensor var_5911_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_925_cast_fp16)[name = tensor("op_5911_cast_fp16")]; tensor var_5912_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_927_cast_fp16)[name = tensor("op_5912_cast_fp16")]; tensor var_5913_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_929_cast_fp16)[name = tensor("op_5913_cast_fp16")]; tensor var_5914_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_931_cast_fp16)[name = tensor("op_5914_cast_fp16")]; tensor var_5915_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_933_cast_fp16)[name = tensor("op_5915_cast_fp16")]; tensor var_5916_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_935_cast_fp16)[name = tensor("op_5916_cast_fp16")]; tensor var_5917_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_937_cast_fp16)[name = tensor("op_5917_cast_fp16")]; tensor var_5918_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_939_cast_fp16)[name = tensor("op_5918_cast_fp16")]; tensor var_5919_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_941_cast_fp16)[name = tensor("op_5919_cast_fp16")]; tensor var_5920_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_943_cast_fp16)[name = tensor("op_5920_cast_fp16")]; tensor var_5921_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_945_cast_fp16)[name = tensor("op_5921_cast_fp16")]; tensor var_5922_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_947_cast_fp16)[name = tensor("op_5922_cast_fp16")]; tensor var_5923_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_949_cast_fp16)[name = tensor("op_5923_cast_fp16")]; tensor var_5924_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_951_cast_fp16)[name = tensor("op_5924_cast_fp16")]; tensor var_5925_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_953_cast_fp16)[name = tensor("op_5925_cast_fp16")]; tensor var_5926_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_955_cast_fp16)[name = tensor("op_5926_cast_fp16")]; tensor var_5927_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_957_cast_fp16)[name = tensor("op_5927_cast_fp16")]; tensor var_5928_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_959_cast_fp16)[name = tensor("op_5928_cast_fp16")]; tensor var_5929_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_961_cast_fp16)[name = tensor("op_5929_cast_fp16")]; tensor var_5930_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_963_cast_fp16)[name = tensor("op_5930_cast_fp16")]; tensor var_5931_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_965_cast_fp16)[name = tensor("op_5931_cast_fp16")]; tensor var_5932_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_967_cast_fp16)[name = tensor("op_5932_cast_fp16")]; tensor var_5933_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_969_cast_fp16)[name = tensor("op_5933_cast_fp16")]; tensor var_5934_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_971_cast_fp16)[name = tensor("op_5934_cast_fp16")]; tensor var_5935_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_973_cast_fp16)[name = tensor("op_5935_cast_fp16")]; tensor var_5936_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_975_cast_fp16)[name = tensor("op_5936_cast_fp16")]; tensor var_5937_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_977_cast_fp16)[name = tensor("op_5937_cast_fp16")]; tensor var_5938_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_979_cast_fp16)[name = tensor("op_5938_cast_fp16")]; tensor var_5939_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_981_cast_fp16)[name = tensor("op_5939_cast_fp16")]; tensor var_5940_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_983_cast_fp16)[name = tensor("op_5940_cast_fp16")]; tensor var_5941_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_985_cast_fp16)[name = tensor("op_5941_cast_fp16")]; tensor var_5942_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_987_cast_fp16)[name = tensor("op_5942_cast_fp16")]; tensor var_5943_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_989_cast_fp16)[name = tensor("op_5943_cast_fp16")]; tensor var_5944_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_991_cast_fp16)[name = tensor("op_5944_cast_fp16")]; tensor var_5945_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_993_cast_fp16)[name = tensor("op_5945_cast_fp16")]; tensor var_5946_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_995_cast_fp16)[name = tensor("op_5946_cast_fp16")]; tensor var_5947_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_997_cast_fp16)[name = tensor("op_5947_cast_fp16")]; tensor var_5948_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_999_cast_fp16)[name = tensor("op_5948_cast_fp16")]; tensor var_5949_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_1001_cast_fp16)[name = tensor("op_5949_cast_fp16")]; tensor var_5950_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_1003_cast_fp16)[name = tensor("op_5950_cast_fp16")]; tensor var_5951_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_1005_cast_fp16)[name = tensor("op_5951_cast_fp16")]; tensor var_5952_cast_fp16 = softmax(axis = var_5325, x = aw_chunk_1007_cast_fp16)[name = tensor("op_5952_cast_fp16")]; tensor var_5954_equation_0 = const()[name = tensor("op_5954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5954_cast_fp16 = einsum(equation = var_5954_equation_0, values = (var_5546_cast_fp16, var_5881_cast_fp16))[name = tensor("op_5954_cast_fp16")]; tensor var_5956_equation_0 = const()[name = tensor("op_5956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5956_cast_fp16 = einsum(equation = var_5956_equation_0, values = (var_5546_cast_fp16, var_5882_cast_fp16))[name = tensor("op_5956_cast_fp16")]; tensor var_5958_equation_0 = const()[name = tensor("op_5958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5958_cast_fp16 = einsum(equation = var_5958_equation_0, values = (var_5546_cast_fp16, var_5883_cast_fp16))[name = tensor("op_5958_cast_fp16")]; tensor var_5960_equation_0 = const()[name = tensor("op_5960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5960_cast_fp16 = einsum(equation = var_5960_equation_0, values = (var_5546_cast_fp16, var_5884_cast_fp16))[name = tensor("op_5960_cast_fp16")]; tensor var_5962_equation_0 = const()[name = tensor("op_5962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5962_cast_fp16 = einsum(equation = var_5962_equation_0, values = (var_5546_cast_fp16, var_5885_cast_fp16))[name = tensor("op_5962_cast_fp16")]; tensor var_5964_equation_0 = const()[name = tensor("op_5964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5964_cast_fp16 = einsum(equation = var_5964_equation_0, values = (var_5546_cast_fp16, var_5886_cast_fp16))[name = tensor("op_5964_cast_fp16")]; tensor var_5966_equation_0 = const()[name = tensor("op_5966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5966_cast_fp16 = einsum(equation = var_5966_equation_0, values = (var_5550_cast_fp16, var_5887_cast_fp16))[name = tensor("op_5966_cast_fp16")]; tensor var_5968_equation_0 = const()[name = tensor("op_5968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5968_cast_fp16 = einsum(equation = var_5968_equation_0, values = (var_5550_cast_fp16, var_5888_cast_fp16))[name = tensor("op_5968_cast_fp16")]; tensor var_5970_equation_0 = const()[name = tensor("op_5970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5970_cast_fp16 = einsum(equation = var_5970_equation_0, values = (var_5550_cast_fp16, var_5889_cast_fp16))[name = tensor("op_5970_cast_fp16")]; tensor var_5972_equation_0 = const()[name = tensor("op_5972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5972_cast_fp16 = einsum(equation = var_5972_equation_0, values = (var_5550_cast_fp16, var_5890_cast_fp16))[name = tensor("op_5972_cast_fp16")]; tensor var_5974_equation_0 = const()[name = tensor("op_5974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5974_cast_fp16 = einsum(equation = var_5974_equation_0, values = (var_5550_cast_fp16, var_5891_cast_fp16))[name = tensor("op_5974_cast_fp16")]; tensor var_5976_equation_0 = const()[name = tensor("op_5976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5976_cast_fp16 = einsum(equation = var_5976_equation_0, values = (var_5550_cast_fp16, var_5892_cast_fp16))[name = tensor("op_5976_cast_fp16")]; tensor var_5978_equation_0 = const()[name = tensor("op_5978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5978_cast_fp16 = einsum(equation = var_5978_equation_0, values = (var_5554_cast_fp16, var_5893_cast_fp16))[name = tensor("op_5978_cast_fp16")]; tensor var_5980_equation_0 = const()[name = tensor("op_5980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5980_cast_fp16 = einsum(equation = var_5980_equation_0, values = (var_5554_cast_fp16, var_5894_cast_fp16))[name = tensor("op_5980_cast_fp16")]; tensor var_5982_equation_0 = const()[name = tensor("op_5982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5982_cast_fp16 = einsum(equation = var_5982_equation_0, values = (var_5554_cast_fp16, var_5895_cast_fp16))[name = tensor("op_5982_cast_fp16")]; tensor var_5984_equation_0 = const()[name = tensor("op_5984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5984_cast_fp16 = einsum(equation = var_5984_equation_0, values = (var_5554_cast_fp16, var_5896_cast_fp16))[name = tensor("op_5984_cast_fp16")]; tensor var_5986_equation_0 = const()[name = tensor("op_5986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5986_cast_fp16 = einsum(equation = var_5986_equation_0, values = (var_5554_cast_fp16, var_5897_cast_fp16))[name = tensor("op_5986_cast_fp16")]; tensor var_5988_equation_0 = const()[name = tensor("op_5988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5988_cast_fp16 = einsum(equation = var_5988_equation_0, values = (var_5554_cast_fp16, var_5898_cast_fp16))[name = tensor("op_5988_cast_fp16")]; tensor var_5990_equation_0 = const()[name = tensor("op_5990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5990_cast_fp16 = einsum(equation = var_5990_equation_0, values = (var_5558_cast_fp16, var_5899_cast_fp16))[name = tensor("op_5990_cast_fp16")]; tensor var_5992_equation_0 = const()[name = tensor("op_5992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5992_cast_fp16 = einsum(equation = var_5992_equation_0, values = (var_5558_cast_fp16, var_5900_cast_fp16))[name = tensor("op_5992_cast_fp16")]; tensor var_5994_equation_0 = const()[name = tensor("op_5994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5994_cast_fp16 = einsum(equation = var_5994_equation_0, values = (var_5558_cast_fp16, var_5901_cast_fp16))[name = tensor("op_5994_cast_fp16")]; tensor var_5996_equation_0 = const()[name = tensor("op_5996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5996_cast_fp16 = einsum(equation = var_5996_equation_0, values = (var_5558_cast_fp16, var_5902_cast_fp16))[name = tensor("op_5996_cast_fp16")]; tensor var_5998_equation_0 = const()[name = tensor("op_5998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5998_cast_fp16 = einsum(equation = var_5998_equation_0, values = (var_5558_cast_fp16, var_5903_cast_fp16))[name = tensor("op_5998_cast_fp16")]; tensor var_6000_equation_0 = const()[name = tensor("op_6000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6000_cast_fp16 = einsum(equation = var_6000_equation_0, values = (var_5558_cast_fp16, var_5904_cast_fp16))[name = tensor("op_6000_cast_fp16")]; tensor var_6002_equation_0 = const()[name = tensor("op_6002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6002_cast_fp16 = einsum(equation = var_6002_equation_0, values = (var_5562_cast_fp16, var_5905_cast_fp16))[name = tensor("op_6002_cast_fp16")]; tensor var_6004_equation_0 = const()[name = tensor("op_6004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6004_cast_fp16 = einsum(equation = var_6004_equation_0, values = (var_5562_cast_fp16, var_5906_cast_fp16))[name = tensor("op_6004_cast_fp16")]; tensor var_6006_equation_0 = const()[name = tensor("op_6006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6006_cast_fp16 = einsum(equation = var_6006_equation_0, values = (var_5562_cast_fp16, var_5907_cast_fp16))[name = tensor("op_6006_cast_fp16")]; tensor var_6008_equation_0 = const()[name = tensor("op_6008_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6008_cast_fp16 = einsum(equation = var_6008_equation_0, values = (var_5562_cast_fp16, var_5908_cast_fp16))[name = tensor("op_6008_cast_fp16")]; tensor var_6010_equation_0 = const()[name = tensor("op_6010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6010_cast_fp16 = einsum(equation = var_6010_equation_0, values = (var_5562_cast_fp16, var_5909_cast_fp16))[name = tensor("op_6010_cast_fp16")]; tensor var_6012_equation_0 = const()[name = tensor("op_6012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6012_cast_fp16 = einsum(equation = var_6012_equation_0, values = (var_5562_cast_fp16, var_5910_cast_fp16))[name = tensor("op_6012_cast_fp16")]; tensor var_6014_equation_0 = const()[name = tensor("op_6014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6014_cast_fp16 = einsum(equation = var_6014_equation_0, values = (var_5566_cast_fp16, var_5911_cast_fp16))[name = tensor("op_6014_cast_fp16")]; tensor var_6016_equation_0 = const()[name = tensor("op_6016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6016_cast_fp16 = einsum(equation = var_6016_equation_0, values = (var_5566_cast_fp16, var_5912_cast_fp16))[name = tensor("op_6016_cast_fp16")]; tensor var_6018_equation_0 = const()[name = tensor("op_6018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6018_cast_fp16 = einsum(equation = var_6018_equation_0, values = (var_5566_cast_fp16, var_5913_cast_fp16))[name = tensor("op_6018_cast_fp16")]; tensor var_6020_equation_0 = const()[name = tensor("op_6020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6020_cast_fp16 = einsum(equation = var_6020_equation_0, values = (var_5566_cast_fp16, var_5914_cast_fp16))[name = tensor("op_6020_cast_fp16")]; tensor var_6022_equation_0 = const()[name = tensor("op_6022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6022_cast_fp16 = einsum(equation = var_6022_equation_0, values = (var_5566_cast_fp16, var_5915_cast_fp16))[name = tensor("op_6022_cast_fp16")]; tensor var_6024_equation_0 = const()[name = tensor("op_6024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6024_cast_fp16 = einsum(equation = var_6024_equation_0, values = (var_5566_cast_fp16, var_5916_cast_fp16))[name = tensor("op_6024_cast_fp16")]; tensor var_6026_equation_0 = const()[name = tensor("op_6026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6026_cast_fp16 = einsum(equation = var_6026_equation_0, values = (var_5570_cast_fp16, var_5917_cast_fp16))[name = tensor("op_6026_cast_fp16")]; tensor var_6028_equation_0 = const()[name = tensor("op_6028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6028_cast_fp16 = einsum(equation = var_6028_equation_0, values = (var_5570_cast_fp16, var_5918_cast_fp16))[name = tensor("op_6028_cast_fp16")]; tensor var_6030_equation_0 = const()[name = tensor("op_6030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6030_cast_fp16 = einsum(equation = var_6030_equation_0, values = (var_5570_cast_fp16, var_5919_cast_fp16))[name = tensor("op_6030_cast_fp16")]; tensor var_6032_equation_0 = const()[name = tensor("op_6032_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6032_cast_fp16 = einsum(equation = var_6032_equation_0, values = (var_5570_cast_fp16, var_5920_cast_fp16))[name = tensor("op_6032_cast_fp16")]; tensor var_6034_equation_0 = const()[name = tensor("op_6034_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6034_cast_fp16 = einsum(equation = var_6034_equation_0, values = (var_5570_cast_fp16, var_5921_cast_fp16))[name = tensor("op_6034_cast_fp16")]; tensor var_6036_equation_0 = const()[name = tensor("op_6036_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6036_cast_fp16 = einsum(equation = var_6036_equation_0, values = (var_5570_cast_fp16, var_5922_cast_fp16))[name = tensor("op_6036_cast_fp16")]; tensor var_6038_equation_0 = const()[name = tensor("op_6038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6038_cast_fp16 = einsum(equation = var_6038_equation_0, values = (var_5574_cast_fp16, var_5923_cast_fp16))[name = tensor("op_6038_cast_fp16")]; tensor var_6040_equation_0 = const()[name = tensor("op_6040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6040_cast_fp16 = einsum(equation = var_6040_equation_0, values = (var_5574_cast_fp16, var_5924_cast_fp16))[name = tensor("op_6040_cast_fp16")]; tensor var_6042_equation_0 = const()[name = tensor("op_6042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6042_cast_fp16 = einsum(equation = var_6042_equation_0, values = (var_5574_cast_fp16, var_5925_cast_fp16))[name = tensor("op_6042_cast_fp16")]; tensor var_6044_equation_0 = const()[name = tensor("op_6044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6044_cast_fp16 = einsum(equation = var_6044_equation_0, values = (var_5574_cast_fp16, var_5926_cast_fp16))[name = tensor("op_6044_cast_fp16")]; tensor var_6046_equation_0 = const()[name = tensor("op_6046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6046_cast_fp16 = einsum(equation = var_6046_equation_0, values = (var_5574_cast_fp16, var_5927_cast_fp16))[name = tensor("op_6046_cast_fp16")]; tensor var_6048_equation_0 = const()[name = tensor("op_6048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6048_cast_fp16 = einsum(equation = var_6048_equation_0, values = (var_5574_cast_fp16, var_5928_cast_fp16))[name = tensor("op_6048_cast_fp16")]; tensor var_6050_equation_0 = const()[name = tensor("op_6050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6050_cast_fp16 = einsum(equation = var_6050_equation_0, values = (var_5578_cast_fp16, var_5929_cast_fp16))[name = tensor("op_6050_cast_fp16")]; tensor var_6052_equation_0 = const()[name = tensor("op_6052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6052_cast_fp16 = einsum(equation = var_6052_equation_0, values = (var_5578_cast_fp16, var_5930_cast_fp16))[name = tensor("op_6052_cast_fp16")]; tensor var_6054_equation_0 = const()[name = tensor("op_6054_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6054_cast_fp16 = einsum(equation = var_6054_equation_0, values = (var_5578_cast_fp16, var_5931_cast_fp16))[name = tensor("op_6054_cast_fp16")]; tensor var_6056_equation_0 = const()[name = tensor("op_6056_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6056_cast_fp16 = einsum(equation = var_6056_equation_0, values = (var_5578_cast_fp16, var_5932_cast_fp16))[name = tensor("op_6056_cast_fp16")]; tensor var_6058_equation_0 = const()[name = tensor("op_6058_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6058_cast_fp16 = einsum(equation = var_6058_equation_0, values = (var_5578_cast_fp16, var_5933_cast_fp16))[name = tensor("op_6058_cast_fp16")]; tensor var_6060_equation_0 = const()[name = tensor("op_6060_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6060_cast_fp16 = einsum(equation = var_6060_equation_0, values = (var_5578_cast_fp16, var_5934_cast_fp16))[name = tensor("op_6060_cast_fp16")]; tensor var_6062_equation_0 = const()[name = tensor("op_6062_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6062_cast_fp16 = einsum(equation = var_6062_equation_0, values = (var_5582_cast_fp16, var_5935_cast_fp16))[name = tensor("op_6062_cast_fp16")]; tensor var_6064_equation_0 = const()[name = tensor("op_6064_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6064_cast_fp16 = einsum(equation = var_6064_equation_0, values = (var_5582_cast_fp16, var_5936_cast_fp16))[name = tensor("op_6064_cast_fp16")]; tensor var_6066_equation_0 = const()[name = tensor("op_6066_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6066_cast_fp16 = einsum(equation = var_6066_equation_0, values = (var_5582_cast_fp16, var_5937_cast_fp16))[name = tensor("op_6066_cast_fp16")]; tensor var_6068_equation_0 = const()[name = tensor("op_6068_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6068_cast_fp16 = einsum(equation = var_6068_equation_0, values = (var_5582_cast_fp16, var_5938_cast_fp16))[name = tensor("op_6068_cast_fp16")]; tensor var_6070_equation_0 = const()[name = tensor("op_6070_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6070_cast_fp16 = einsum(equation = var_6070_equation_0, values = (var_5582_cast_fp16, var_5939_cast_fp16))[name = tensor("op_6070_cast_fp16")]; tensor var_6072_equation_0 = const()[name = tensor("op_6072_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6072_cast_fp16 = einsum(equation = var_6072_equation_0, values = (var_5582_cast_fp16, var_5940_cast_fp16))[name = tensor("op_6072_cast_fp16")]; tensor var_6074_equation_0 = const()[name = tensor("op_6074_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6074_cast_fp16 = einsum(equation = var_6074_equation_0, values = (var_5586_cast_fp16, var_5941_cast_fp16))[name = tensor("op_6074_cast_fp16")]; tensor var_6076_equation_0 = const()[name = tensor("op_6076_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6076_cast_fp16 = einsum(equation = var_6076_equation_0, values = (var_5586_cast_fp16, var_5942_cast_fp16))[name = tensor("op_6076_cast_fp16")]; tensor var_6078_equation_0 = const()[name = tensor("op_6078_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6078_cast_fp16 = einsum(equation = var_6078_equation_0, values = (var_5586_cast_fp16, var_5943_cast_fp16))[name = tensor("op_6078_cast_fp16")]; tensor var_6080_equation_0 = const()[name = tensor("op_6080_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6080_cast_fp16 = einsum(equation = var_6080_equation_0, values = (var_5586_cast_fp16, var_5944_cast_fp16))[name = tensor("op_6080_cast_fp16")]; tensor var_6082_equation_0 = const()[name = tensor("op_6082_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6082_cast_fp16 = einsum(equation = var_6082_equation_0, values = (var_5586_cast_fp16, var_5945_cast_fp16))[name = tensor("op_6082_cast_fp16")]; tensor var_6084_equation_0 = const()[name = tensor("op_6084_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6084_cast_fp16 = einsum(equation = var_6084_equation_0, values = (var_5586_cast_fp16, var_5946_cast_fp16))[name = tensor("op_6084_cast_fp16")]; tensor var_6086_equation_0 = const()[name = tensor("op_6086_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6086_cast_fp16 = einsum(equation = var_6086_equation_0, values = (var_5590_cast_fp16, var_5947_cast_fp16))[name = tensor("op_6086_cast_fp16")]; tensor var_6088_equation_0 = const()[name = tensor("op_6088_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6088_cast_fp16 = einsum(equation = var_6088_equation_0, values = (var_5590_cast_fp16, var_5948_cast_fp16))[name = tensor("op_6088_cast_fp16")]; tensor var_6090_equation_0 = const()[name = tensor("op_6090_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6090_cast_fp16 = einsum(equation = var_6090_equation_0, values = (var_5590_cast_fp16, var_5949_cast_fp16))[name = tensor("op_6090_cast_fp16")]; tensor var_6092_equation_0 = const()[name = tensor("op_6092_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6092_cast_fp16 = einsum(equation = var_6092_equation_0, values = (var_5590_cast_fp16, var_5950_cast_fp16))[name = tensor("op_6092_cast_fp16")]; tensor var_6094_equation_0 = const()[name = tensor("op_6094_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6094_cast_fp16 = einsum(equation = var_6094_equation_0, values = (var_5590_cast_fp16, var_5951_cast_fp16))[name = tensor("op_6094_cast_fp16")]; tensor var_6096_equation_0 = const()[name = tensor("op_6096_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6096_cast_fp16 = einsum(equation = var_6096_equation_0, values = (var_5590_cast_fp16, var_5952_cast_fp16))[name = tensor("op_6096_cast_fp16")]; tensor var_6098_interleave_0 = const()[name = tensor("op_6098_interleave_0"), val = tensor(false)]; tensor var_6098_cast_fp16 = concat(axis = var_5309, interleave = var_6098_interleave_0, values = (var_5954_cast_fp16, var_5956_cast_fp16, var_5958_cast_fp16, var_5960_cast_fp16, var_5962_cast_fp16, var_5964_cast_fp16))[name = tensor("op_6098_cast_fp16")]; tensor var_6100_interleave_0 = const()[name = tensor("op_6100_interleave_0"), val = tensor(false)]; tensor var_6100_cast_fp16 = concat(axis = var_5309, interleave = var_6100_interleave_0, values = (var_5966_cast_fp16, var_5968_cast_fp16, var_5970_cast_fp16, var_5972_cast_fp16, var_5974_cast_fp16, var_5976_cast_fp16))[name = tensor("op_6100_cast_fp16")]; tensor var_6102_interleave_0 = const()[name = tensor("op_6102_interleave_0"), val = tensor(false)]; tensor var_6102_cast_fp16 = concat(axis = var_5309, interleave = var_6102_interleave_0, values = (var_5978_cast_fp16, var_5980_cast_fp16, var_5982_cast_fp16, var_5984_cast_fp16, var_5986_cast_fp16, var_5988_cast_fp16))[name = tensor("op_6102_cast_fp16")]; tensor var_6104_interleave_0 = const()[name = tensor("op_6104_interleave_0"), val = tensor(false)]; tensor var_6104_cast_fp16 = concat(axis = var_5309, interleave = var_6104_interleave_0, values = (var_5990_cast_fp16, var_5992_cast_fp16, var_5994_cast_fp16, var_5996_cast_fp16, var_5998_cast_fp16, var_6000_cast_fp16))[name = tensor("op_6104_cast_fp16")]; tensor var_6106_interleave_0 = const()[name = tensor("op_6106_interleave_0"), val = tensor(false)]; tensor var_6106_cast_fp16 = concat(axis = var_5309, interleave = var_6106_interleave_0, values = (var_6002_cast_fp16, var_6004_cast_fp16, var_6006_cast_fp16, var_6008_cast_fp16, var_6010_cast_fp16, var_6012_cast_fp16))[name = tensor("op_6106_cast_fp16")]; tensor var_6108_interleave_0 = const()[name = tensor("op_6108_interleave_0"), val = tensor(false)]; tensor var_6108_cast_fp16 = concat(axis = var_5309, interleave = var_6108_interleave_0, values = (var_6014_cast_fp16, var_6016_cast_fp16, var_6018_cast_fp16, var_6020_cast_fp16, var_6022_cast_fp16, var_6024_cast_fp16))[name = tensor("op_6108_cast_fp16")]; tensor var_6110_interleave_0 = const()[name = tensor("op_6110_interleave_0"), val = tensor(false)]; tensor var_6110_cast_fp16 = concat(axis = var_5309, interleave = var_6110_interleave_0, values = (var_6026_cast_fp16, var_6028_cast_fp16, var_6030_cast_fp16, var_6032_cast_fp16, var_6034_cast_fp16, var_6036_cast_fp16))[name = tensor("op_6110_cast_fp16")]; tensor var_6112_interleave_0 = const()[name = tensor("op_6112_interleave_0"), val = tensor(false)]; tensor var_6112_cast_fp16 = concat(axis = var_5309, interleave = var_6112_interleave_0, values = (var_6038_cast_fp16, var_6040_cast_fp16, var_6042_cast_fp16, var_6044_cast_fp16, var_6046_cast_fp16, var_6048_cast_fp16))[name = tensor("op_6112_cast_fp16")]; tensor var_6114_interleave_0 = const()[name = tensor("op_6114_interleave_0"), val = tensor(false)]; tensor var_6114_cast_fp16 = concat(axis = var_5309, interleave = var_6114_interleave_0, values = (var_6050_cast_fp16, var_6052_cast_fp16, var_6054_cast_fp16, var_6056_cast_fp16, var_6058_cast_fp16, var_6060_cast_fp16))[name = tensor("op_6114_cast_fp16")]; tensor var_6116_interleave_0 = const()[name = tensor("op_6116_interleave_0"), val = tensor(false)]; tensor var_6116_cast_fp16 = concat(axis = var_5309, interleave = var_6116_interleave_0, values = (var_6062_cast_fp16, var_6064_cast_fp16, var_6066_cast_fp16, var_6068_cast_fp16, var_6070_cast_fp16, var_6072_cast_fp16))[name = tensor("op_6116_cast_fp16")]; tensor var_6118_interleave_0 = const()[name = tensor("op_6118_interleave_0"), val = tensor(false)]; tensor var_6118_cast_fp16 = concat(axis = var_5309, interleave = var_6118_interleave_0, values = (var_6074_cast_fp16, var_6076_cast_fp16, var_6078_cast_fp16, var_6080_cast_fp16, var_6082_cast_fp16, var_6084_cast_fp16))[name = tensor("op_6118_cast_fp16")]; tensor var_6120_interleave_0 = const()[name = tensor("op_6120_interleave_0"), val = tensor(false)]; tensor var_6120_cast_fp16 = concat(axis = var_5309, interleave = var_6120_interleave_0, values = (var_6086_cast_fp16, var_6088_cast_fp16, var_6090_cast_fp16, var_6092_cast_fp16, var_6094_cast_fp16, var_6096_cast_fp16))[name = tensor("op_6120_cast_fp16")]; tensor input_49_interleave_0 = const()[name = tensor("input_49_interleave_0"), val = tensor(false)]; tensor input_49_cast_fp16 = concat(axis = var_5325, interleave = input_49_interleave_0, values = (var_6098_cast_fp16, var_6100_cast_fp16, var_6102_cast_fp16, var_6104_cast_fp16, var_6106_cast_fp16, var_6108_cast_fp16, var_6110_cast_fp16, var_6112_cast_fp16, var_6114_cast_fp16, var_6116_cast_fp16, var_6118_cast_fp16, var_6120_cast_fp16))[name = tensor("input_49_cast_fp16")]; tensor obj_27_pad_type_0 = const()[name = tensor("obj_27_pad_type_0"), val = tensor("valid")]; tensor obj_27_strides_0 = const()[name = tensor("obj_27_strides_0"), val = tensor([1, 1])]; tensor obj_27_pad_0 = const()[name = tensor("obj_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_27_dilations_0 = const()[name = tensor("obj_27_dilations_0"), val = tensor([1, 1])]; tensor obj_27_groups_0 = const()[name = tensor("obj_27_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94814784)))]; tensor layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95994496)))]; tensor obj_27_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = obj_27_dilations_0, groups = obj_27_groups_0, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = obj_27_strides_0, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_49_cast_fp16)[name = tensor("obj_27_cast_fp16")]; tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; tensor var_6139_to_fp16 = const()[name = tensor("op_6139_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_6139_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; tensor input_51_gamma_0_to_fp16 = const()[name = tensor("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95996096)))]; tensor input_51_beta_0_to_fp16 = const()[name = tensor("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95997696)))]; tensor input_51_epsilon_0_to_fp16 = const()[name = tensor("input_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor input_53_pad_type_0 = const()[name = tensor("input_53_pad_type_0"), val = tensor("valid")]; tensor input_53_strides_0 = const()[name = tensor("input_53_strides_0"), val = tensor([1, 1])]; tensor input_53_pad_0 = const()[name = tensor("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_53_dilations_0 = const()[name = tensor("input_53_dilations_0"), val = tensor([1, 1])]; tensor input_53_groups_0 = const()[name = tensor("input_53_groups_0"), val = tensor(1)]; tensor layers_6_fc1_weight_to_fp16 = const()[name = tensor("layers_6_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95999296)))]; tensor layers_6_fc1_bias_to_fp16 = const()[name = tensor("layers_6_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100717952)))]; tensor input_53_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = layers_6_fc1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("input_53_cast_fp16")]; tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; tensor hidden_states_17_pad_type_0 = const()[name = tensor("hidden_states_17_pad_type_0"), val = tensor("valid")]; tensor hidden_states_17_strides_0 = const()[name = tensor("hidden_states_17_strides_0"), val = tensor([1, 1])]; tensor hidden_states_17_pad_0 = const()[name = tensor("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_17_dilations_0 = const()[name = tensor("hidden_states_17_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_17_groups_0 = const()[name = tensor("hidden_states_17_groups_0"), val = tensor(1)]; tensor layers_6_fc2_weight_to_fp16 = const()[name = tensor("layers_6_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100724160)))]; tensor layers_6_fc2_bias_to_fp16 = const()[name = tensor("layers_6_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105442816)))]; tensor hidden_states_17_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = hidden_states_17_dilations_0, groups = hidden_states_17_groups_0, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = hidden_states_17_strides_0, weight = layers_6_fc2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; tensor var_6171 = const()[name = tensor("op_6171"), val = tensor(3)]; tensor var_6187 = const()[name = tensor("op_6187"), val = tensor(1)]; tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; tensor var_6204_to_fp16 = const()[name = tensor("op_6204_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_6204_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105444416)))]; tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105446016)))]; tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("obj_29_cast_fp16")]; tensor query_15_pad_type_0 = const()[name = tensor("query_15_pad_type_0"), val = tensor("valid")]; tensor query_15_strides_0 = const()[name = tensor("query_15_strides_0"), val = tensor([1, 1])]; tensor query_15_pad_0 = const()[name = tensor("query_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_15_dilations_0 = const()[name = tensor("query_15_dilations_0"), val = tensor([1, 1])]; tensor query_15_groups_0 = const()[name = tensor("query_15_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105447616)))]; tensor layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106627328)))]; tensor query_15_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = query_15_dilations_0, groups = query_15_groups_0, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = query_15_strides_0, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor("query_15_cast_fp16")]; tensor key_15_pad_type_0 = const()[name = tensor("key_15_pad_type_0"), val = tensor("valid")]; tensor key_15_strides_0 = const()[name = tensor("key_15_strides_0"), val = tensor([1, 1])]; tensor key_15_pad_0 = const()[name = tensor("key_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_15_dilations_0 = const()[name = tensor("key_15_dilations_0"), val = tensor([1, 1])]; tensor key_15_groups_0 = const()[name = tensor("key_15_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106628928)))]; tensor key_15_cast_fp16 = conv(dilations = key_15_dilations_0, groups = key_15_groups_0, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = key_15_strides_0, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor("key_15_cast_fp16")]; tensor value_15_pad_type_0 = const()[name = tensor("value_15_pad_type_0"), val = tensor("valid")]; tensor value_15_strides_0 = const()[name = tensor("value_15_strides_0"), val = tensor([1, 1])]; tensor value_15_pad_0 = const()[name = tensor("value_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_15_dilations_0 = const()[name = tensor("value_15_dilations_0"), val = tensor([1, 1])]; tensor value_15_groups_0 = const()[name = tensor("value_15_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107808640)))]; tensor layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108988352)))]; tensor value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = value_15_dilations_0, groups = value_15_groups_0, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = value_15_strides_0, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor("value_15_cast_fp16")]; tensor var_6239_begin_0 = const()[name = tensor("op_6239_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6239_end_0 = const()[name = tensor("op_6239_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6239_end_mask_0 = const()[name = tensor("op_6239_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6239_cast_fp16 = slice_by_index(begin = var_6239_begin_0, end = var_6239_end_0, end_mask = var_6239_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6239_cast_fp16")]; tensor var_6243_begin_0 = const()[name = tensor("op_6243_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_6243_end_0 = const()[name = tensor("op_6243_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_6243_end_mask_0 = const()[name = tensor("op_6243_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6243_cast_fp16 = slice_by_index(begin = var_6243_begin_0, end = var_6243_end_0, end_mask = var_6243_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6243_cast_fp16")]; tensor var_6247_begin_0 = const()[name = tensor("op_6247_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_6247_end_0 = const()[name = tensor("op_6247_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_6247_end_mask_0 = const()[name = tensor("op_6247_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6247_cast_fp16 = slice_by_index(begin = var_6247_begin_0, end = var_6247_end_0, end_mask = var_6247_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6247_cast_fp16")]; tensor var_6251_begin_0 = const()[name = tensor("op_6251_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_6251_end_0 = const()[name = tensor("op_6251_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_6251_end_mask_0 = const()[name = tensor("op_6251_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6251_cast_fp16 = slice_by_index(begin = var_6251_begin_0, end = var_6251_end_0, end_mask = var_6251_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6251_cast_fp16")]; tensor var_6255_begin_0 = const()[name = tensor("op_6255_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_6255_end_0 = const()[name = tensor("op_6255_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_6255_end_mask_0 = const()[name = tensor("op_6255_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6255_cast_fp16 = slice_by_index(begin = var_6255_begin_0, end = var_6255_end_0, end_mask = var_6255_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6255_cast_fp16")]; tensor var_6259_begin_0 = const()[name = tensor("op_6259_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6259_end_0 = const()[name = tensor("op_6259_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_6259_end_mask_0 = const()[name = tensor("op_6259_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6259_cast_fp16 = slice_by_index(begin = var_6259_begin_0, end = var_6259_end_0, end_mask = var_6259_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6259_cast_fp16")]; tensor var_6263_begin_0 = const()[name = tensor("op_6263_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_6263_end_0 = const()[name = tensor("op_6263_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_6263_end_mask_0 = const()[name = tensor("op_6263_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6263_cast_fp16 = slice_by_index(begin = var_6263_begin_0, end = var_6263_end_0, end_mask = var_6263_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6263_cast_fp16")]; tensor var_6267_begin_0 = const()[name = tensor("op_6267_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_6267_end_0 = const()[name = tensor("op_6267_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_6267_end_mask_0 = const()[name = tensor("op_6267_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6267_cast_fp16 = slice_by_index(begin = var_6267_begin_0, end = var_6267_end_0, end_mask = var_6267_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6267_cast_fp16")]; tensor var_6271_begin_0 = const()[name = tensor("op_6271_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_6271_end_0 = const()[name = tensor("op_6271_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_6271_end_mask_0 = const()[name = tensor("op_6271_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6271_cast_fp16 = slice_by_index(begin = var_6271_begin_0, end = var_6271_end_0, end_mask = var_6271_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6271_cast_fp16")]; tensor var_6275_begin_0 = const()[name = tensor("op_6275_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_6275_end_0 = const()[name = tensor("op_6275_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_6275_end_mask_0 = const()[name = tensor("op_6275_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6275_cast_fp16 = slice_by_index(begin = var_6275_begin_0, end = var_6275_end_0, end_mask = var_6275_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6275_cast_fp16")]; tensor var_6279_begin_0 = const()[name = tensor("op_6279_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6279_end_0 = const()[name = tensor("op_6279_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_6279_end_mask_0 = const()[name = tensor("op_6279_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6279_cast_fp16 = slice_by_index(begin = var_6279_begin_0, end = var_6279_end_0, end_mask = var_6279_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6279_cast_fp16")]; tensor var_6283_begin_0 = const()[name = tensor("op_6283_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_6283_end_0 = const()[name = tensor("op_6283_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_6283_end_mask_0 = const()[name = tensor("op_6283_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6283_cast_fp16 = slice_by_index(begin = var_6283_begin_0, end = var_6283_end_0, end_mask = var_6283_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_6283_cast_fp16")]; tensor var_6286_begin_0 = const()[name = tensor("op_6286_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6286_end_0 = const()[name = tensor("op_6286_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6286_end_mask_0 = const()[name = tensor("op_6286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6286_cast_fp16 = slice_by_index(begin = var_6286_begin_0, end = var_6286_end_0, end_mask = var_6286_end_mask_0, x = var_6239_cast_fp16)[name = tensor("op_6286_cast_fp16")]; tensor var_6287_begin_0 = const()[name = tensor("op_6287_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6287_end_0 = const()[name = tensor("op_6287_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6287_end_mask_0 = const()[name = tensor("op_6287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6287_cast_fp16 = slice_by_index(begin = var_6287_begin_0, end = var_6287_end_0, end_mask = var_6287_end_mask_0, x = var_6239_cast_fp16)[name = tensor("op_6287_cast_fp16")]; tensor var_6288_begin_0 = const()[name = tensor("op_6288_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6288_end_0 = const()[name = tensor("op_6288_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6288_end_mask_0 = const()[name = tensor("op_6288_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6288_cast_fp16 = slice_by_index(begin = var_6288_begin_0, end = var_6288_end_0, end_mask = var_6288_end_mask_0, x = var_6239_cast_fp16)[name = tensor("op_6288_cast_fp16")]; tensor var_6289_begin_0 = const()[name = tensor("op_6289_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6289_end_0 = const()[name = tensor("op_6289_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6289_end_mask_0 = const()[name = tensor("op_6289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6289_cast_fp16 = slice_by_index(begin = var_6289_begin_0, end = var_6289_end_0, end_mask = var_6289_end_mask_0, x = var_6239_cast_fp16)[name = tensor("op_6289_cast_fp16")]; tensor var_6290_begin_0 = const()[name = tensor("op_6290_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6290_end_0 = const()[name = tensor("op_6290_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6290_end_mask_0 = const()[name = tensor("op_6290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6290_cast_fp16 = slice_by_index(begin = var_6290_begin_0, end = var_6290_end_0, end_mask = var_6290_end_mask_0, x = var_6239_cast_fp16)[name = tensor("op_6290_cast_fp16")]; tensor var_6291_begin_0 = const()[name = tensor("op_6291_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6291_end_0 = const()[name = tensor("op_6291_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6291_end_mask_0 = const()[name = tensor("op_6291_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6291_cast_fp16 = slice_by_index(begin = var_6291_begin_0, end = var_6291_end_0, end_mask = var_6291_end_mask_0, x = var_6239_cast_fp16)[name = tensor("op_6291_cast_fp16")]; tensor var_6292_begin_0 = const()[name = tensor("op_6292_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6292_end_0 = const()[name = tensor("op_6292_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6292_end_mask_0 = const()[name = tensor("op_6292_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6292_cast_fp16 = slice_by_index(begin = var_6292_begin_0, end = var_6292_end_0, end_mask = var_6292_end_mask_0, x = var_6243_cast_fp16)[name = tensor("op_6292_cast_fp16")]; tensor var_6293_begin_0 = const()[name = tensor("op_6293_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6293_end_0 = const()[name = tensor("op_6293_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6293_end_mask_0 = const()[name = tensor("op_6293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6293_cast_fp16 = slice_by_index(begin = var_6293_begin_0, end = var_6293_end_0, end_mask = var_6293_end_mask_0, x = var_6243_cast_fp16)[name = tensor("op_6293_cast_fp16")]; tensor var_6294_begin_0 = const()[name = tensor("op_6294_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6294_end_0 = const()[name = tensor("op_6294_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6294_end_mask_0 = const()[name = tensor("op_6294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6294_cast_fp16 = slice_by_index(begin = var_6294_begin_0, end = var_6294_end_0, end_mask = var_6294_end_mask_0, x = var_6243_cast_fp16)[name = tensor("op_6294_cast_fp16")]; tensor var_6295_begin_0 = const()[name = tensor("op_6295_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6295_end_0 = const()[name = tensor("op_6295_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6295_end_mask_0 = const()[name = tensor("op_6295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6295_cast_fp16 = slice_by_index(begin = var_6295_begin_0, end = var_6295_end_0, end_mask = var_6295_end_mask_0, x = var_6243_cast_fp16)[name = tensor("op_6295_cast_fp16")]; tensor var_6296_begin_0 = const()[name = tensor("op_6296_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6296_end_0 = const()[name = tensor("op_6296_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6296_end_mask_0 = const()[name = tensor("op_6296_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6296_cast_fp16 = slice_by_index(begin = var_6296_begin_0, end = var_6296_end_0, end_mask = var_6296_end_mask_0, x = var_6243_cast_fp16)[name = tensor("op_6296_cast_fp16")]; tensor var_6297_begin_0 = const()[name = tensor("op_6297_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6297_end_0 = const()[name = tensor("op_6297_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6297_end_mask_0 = const()[name = tensor("op_6297_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6297_cast_fp16 = slice_by_index(begin = var_6297_begin_0, end = var_6297_end_0, end_mask = var_6297_end_mask_0, x = var_6243_cast_fp16)[name = tensor("op_6297_cast_fp16")]; tensor var_6298_begin_0 = const()[name = tensor("op_6298_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6298_end_0 = const()[name = tensor("op_6298_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6298_end_mask_0 = const()[name = tensor("op_6298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6298_cast_fp16 = slice_by_index(begin = var_6298_begin_0, end = var_6298_end_0, end_mask = var_6298_end_mask_0, x = var_6247_cast_fp16)[name = tensor("op_6298_cast_fp16")]; tensor var_6299_begin_0 = const()[name = tensor("op_6299_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6299_end_0 = const()[name = tensor("op_6299_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6299_end_mask_0 = const()[name = tensor("op_6299_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6299_cast_fp16 = slice_by_index(begin = var_6299_begin_0, end = var_6299_end_0, end_mask = var_6299_end_mask_0, x = var_6247_cast_fp16)[name = tensor("op_6299_cast_fp16")]; tensor var_6300_begin_0 = const()[name = tensor("op_6300_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6300_end_0 = const()[name = tensor("op_6300_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6300_end_mask_0 = const()[name = tensor("op_6300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6300_cast_fp16 = slice_by_index(begin = var_6300_begin_0, end = var_6300_end_0, end_mask = var_6300_end_mask_0, x = var_6247_cast_fp16)[name = tensor("op_6300_cast_fp16")]; tensor var_6301_begin_0 = const()[name = tensor("op_6301_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6301_end_0 = const()[name = tensor("op_6301_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6301_end_mask_0 = const()[name = tensor("op_6301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6301_cast_fp16 = slice_by_index(begin = var_6301_begin_0, end = var_6301_end_0, end_mask = var_6301_end_mask_0, x = var_6247_cast_fp16)[name = tensor("op_6301_cast_fp16")]; tensor var_6302_begin_0 = const()[name = tensor("op_6302_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6302_end_0 = const()[name = tensor("op_6302_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6302_end_mask_0 = const()[name = tensor("op_6302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6302_cast_fp16 = slice_by_index(begin = var_6302_begin_0, end = var_6302_end_0, end_mask = var_6302_end_mask_0, x = var_6247_cast_fp16)[name = tensor("op_6302_cast_fp16")]; tensor var_6303_begin_0 = const()[name = tensor("op_6303_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6303_end_0 = const()[name = tensor("op_6303_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6303_end_mask_0 = const()[name = tensor("op_6303_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6303_cast_fp16 = slice_by_index(begin = var_6303_begin_0, end = var_6303_end_0, end_mask = var_6303_end_mask_0, x = var_6247_cast_fp16)[name = tensor("op_6303_cast_fp16")]; tensor var_6304_begin_0 = const()[name = tensor("op_6304_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6304_end_0 = const()[name = tensor("op_6304_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6304_end_mask_0 = const()[name = tensor("op_6304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6304_cast_fp16 = slice_by_index(begin = var_6304_begin_0, end = var_6304_end_0, end_mask = var_6304_end_mask_0, x = var_6251_cast_fp16)[name = tensor("op_6304_cast_fp16")]; tensor var_6305_begin_0 = const()[name = tensor("op_6305_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6305_end_0 = const()[name = tensor("op_6305_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6305_end_mask_0 = const()[name = tensor("op_6305_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6305_cast_fp16 = slice_by_index(begin = var_6305_begin_0, end = var_6305_end_0, end_mask = var_6305_end_mask_0, x = var_6251_cast_fp16)[name = tensor("op_6305_cast_fp16")]; tensor var_6306_begin_0 = const()[name = tensor("op_6306_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6306_end_0 = const()[name = tensor("op_6306_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6306_end_mask_0 = const()[name = tensor("op_6306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6306_cast_fp16 = slice_by_index(begin = var_6306_begin_0, end = var_6306_end_0, end_mask = var_6306_end_mask_0, x = var_6251_cast_fp16)[name = tensor("op_6306_cast_fp16")]; tensor var_6307_begin_0 = const()[name = tensor("op_6307_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6307_end_0 = const()[name = tensor("op_6307_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6307_end_mask_0 = const()[name = tensor("op_6307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6307_cast_fp16 = slice_by_index(begin = var_6307_begin_0, end = var_6307_end_0, end_mask = var_6307_end_mask_0, x = var_6251_cast_fp16)[name = tensor("op_6307_cast_fp16")]; tensor var_6308_begin_0 = const()[name = tensor("op_6308_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6308_end_0 = const()[name = tensor("op_6308_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6308_end_mask_0 = const()[name = tensor("op_6308_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6308_cast_fp16 = slice_by_index(begin = var_6308_begin_0, end = var_6308_end_0, end_mask = var_6308_end_mask_0, x = var_6251_cast_fp16)[name = tensor("op_6308_cast_fp16")]; tensor var_6309_begin_0 = const()[name = tensor("op_6309_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6309_end_0 = const()[name = tensor("op_6309_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6309_end_mask_0 = const()[name = tensor("op_6309_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6309_cast_fp16 = slice_by_index(begin = var_6309_begin_0, end = var_6309_end_0, end_mask = var_6309_end_mask_0, x = var_6251_cast_fp16)[name = tensor("op_6309_cast_fp16")]; tensor var_6310_begin_0 = const()[name = tensor("op_6310_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6310_end_0 = const()[name = tensor("op_6310_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6310_end_mask_0 = const()[name = tensor("op_6310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6310_cast_fp16 = slice_by_index(begin = var_6310_begin_0, end = var_6310_end_0, end_mask = var_6310_end_mask_0, x = var_6255_cast_fp16)[name = tensor("op_6310_cast_fp16")]; tensor var_6311_begin_0 = const()[name = tensor("op_6311_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6311_end_0 = const()[name = tensor("op_6311_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6311_end_mask_0 = const()[name = tensor("op_6311_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6311_cast_fp16 = slice_by_index(begin = var_6311_begin_0, end = var_6311_end_0, end_mask = var_6311_end_mask_0, x = var_6255_cast_fp16)[name = tensor("op_6311_cast_fp16")]; tensor var_6312_begin_0 = const()[name = tensor("op_6312_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6312_end_0 = const()[name = tensor("op_6312_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6312_end_mask_0 = const()[name = tensor("op_6312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6312_cast_fp16 = slice_by_index(begin = var_6312_begin_0, end = var_6312_end_0, end_mask = var_6312_end_mask_0, x = var_6255_cast_fp16)[name = tensor("op_6312_cast_fp16")]; tensor var_6313_begin_0 = const()[name = tensor("op_6313_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6313_end_0 = const()[name = tensor("op_6313_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6313_end_mask_0 = const()[name = tensor("op_6313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6313_cast_fp16 = slice_by_index(begin = var_6313_begin_0, end = var_6313_end_0, end_mask = var_6313_end_mask_0, x = var_6255_cast_fp16)[name = tensor("op_6313_cast_fp16")]; tensor var_6314_begin_0 = const()[name = tensor("op_6314_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6314_end_0 = const()[name = tensor("op_6314_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6314_end_mask_0 = const()[name = tensor("op_6314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6314_cast_fp16 = slice_by_index(begin = var_6314_begin_0, end = var_6314_end_0, end_mask = var_6314_end_mask_0, x = var_6255_cast_fp16)[name = tensor("op_6314_cast_fp16")]; tensor var_6315_begin_0 = const()[name = tensor("op_6315_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6315_end_0 = const()[name = tensor("op_6315_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6315_end_mask_0 = const()[name = tensor("op_6315_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6315_cast_fp16 = slice_by_index(begin = var_6315_begin_0, end = var_6315_end_0, end_mask = var_6315_end_mask_0, x = var_6255_cast_fp16)[name = tensor("op_6315_cast_fp16")]; tensor var_6316_begin_0 = const()[name = tensor("op_6316_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6316_end_0 = const()[name = tensor("op_6316_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6316_end_mask_0 = const()[name = tensor("op_6316_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6316_cast_fp16 = slice_by_index(begin = var_6316_begin_0, end = var_6316_end_0, end_mask = var_6316_end_mask_0, x = var_6259_cast_fp16)[name = tensor("op_6316_cast_fp16")]; tensor var_6317_begin_0 = const()[name = tensor("op_6317_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6317_end_0 = const()[name = tensor("op_6317_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6317_end_mask_0 = const()[name = tensor("op_6317_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6317_cast_fp16 = slice_by_index(begin = var_6317_begin_0, end = var_6317_end_0, end_mask = var_6317_end_mask_0, x = var_6259_cast_fp16)[name = tensor("op_6317_cast_fp16")]; tensor var_6318_begin_0 = const()[name = tensor("op_6318_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6318_end_0 = const()[name = tensor("op_6318_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6318_end_mask_0 = const()[name = tensor("op_6318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6318_cast_fp16 = slice_by_index(begin = var_6318_begin_0, end = var_6318_end_0, end_mask = var_6318_end_mask_0, x = var_6259_cast_fp16)[name = tensor("op_6318_cast_fp16")]; tensor var_6319_begin_0 = const()[name = tensor("op_6319_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6319_end_0 = const()[name = tensor("op_6319_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6319_end_mask_0 = const()[name = tensor("op_6319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6319_cast_fp16 = slice_by_index(begin = var_6319_begin_0, end = var_6319_end_0, end_mask = var_6319_end_mask_0, x = var_6259_cast_fp16)[name = tensor("op_6319_cast_fp16")]; tensor var_6320_begin_0 = const()[name = tensor("op_6320_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6320_end_0 = const()[name = tensor("op_6320_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6320_end_mask_0 = const()[name = tensor("op_6320_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6320_cast_fp16 = slice_by_index(begin = var_6320_begin_0, end = var_6320_end_0, end_mask = var_6320_end_mask_0, x = var_6259_cast_fp16)[name = tensor("op_6320_cast_fp16")]; tensor var_6321_begin_0 = const()[name = tensor("op_6321_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6321_end_0 = const()[name = tensor("op_6321_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6321_end_mask_0 = const()[name = tensor("op_6321_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6321_cast_fp16 = slice_by_index(begin = var_6321_begin_0, end = var_6321_end_0, end_mask = var_6321_end_mask_0, x = var_6259_cast_fp16)[name = tensor("op_6321_cast_fp16")]; tensor var_6322_begin_0 = const()[name = tensor("op_6322_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6322_end_0 = const()[name = tensor("op_6322_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6322_end_mask_0 = const()[name = tensor("op_6322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6322_cast_fp16 = slice_by_index(begin = var_6322_begin_0, end = var_6322_end_0, end_mask = var_6322_end_mask_0, x = var_6263_cast_fp16)[name = tensor("op_6322_cast_fp16")]; tensor var_6323_begin_0 = const()[name = tensor("op_6323_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6323_end_0 = const()[name = tensor("op_6323_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6323_end_mask_0 = const()[name = tensor("op_6323_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6323_cast_fp16 = slice_by_index(begin = var_6323_begin_0, end = var_6323_end_0, end_mask = var_6323_end_mask_0, x = var_6263_cast_fp16)[name = tensor("op_6323_cast_fp16")]; tensor var_6324_begin_0 = const()[name = tensor("op_6324_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6324_end_0 = const()[name = tensor("op_6324_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6324_end_mask_0 = const()[name = tensor("op_6324_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6324_cast_fp16 = slice_by_index(begin = var_6324_begin_0, end = var_6324_end_0, end_mask = var_6324_end_mask_0, x = var_6263_cast_fp16)[name = tensor("op_6324_cast_fp16")]; tensor var_6325_begin_0 = const()[name = tensor("op_6325_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6325_end_0 = const()[name = tensor("op_6325_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6325_end_mask_0 = const()[name = tensor("op_6325_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6325_cast_fp16 = slice_by_index(begin = var_6325_begin_0, end = var_6325_end_0, end_mask = var_6325_end_mask_0, x = var_6263_cast_fp16)[name = tensor("op_6325_cast_fp16")]; tensor var_6326_begin_0 = const()[name = tensor("op_6326_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6326_end_0 = const()[name = tensor("op_6326_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6326_end_mask_0 = const()[name = tensor("op_6326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6326_cast_fp16 = slice_by_index(begin = var_6326_begin_0, end = var_6326_end_0, end_mask = var_6326_end_mask_0, x = var_6263_cast_fp16)[name = tensor("op_6326_cast_fp16")]; tensor var_6327_begin_0 = const()[name = tensor("op_6327_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6327_end_0 = const()[name = tensor("op_6327_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6327_end_mask_0 = const()[name = tensor("op_6327_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6327_cast_fp16 = slice_by_index(begin = var_6327_begin_0, end = var_6327_end_0, end_mask = var_6327_end_mask_0, x = var_6263_cast_fp16)[name = tensor("op_6327_cast_fp16")]; tensor var_6328_begin_0 = const()[name = tensor("op_6328_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6328_end_0 = const()[name = tensor("op_6328_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6328_end_mask_0 = const()[name = tensor("op_6328_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6328_cast_fp16 = slice_by_index(begin = var_6328_begin_0, end = var_6328_end_0, end_mask = var_6328_end_mask_0, x = var_6267_cast_fp16)[name = tensor("op_6328_cast_fp16")]; tensor var_6329_begin_0 = const()[name = tensor("op_6329_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6329_end_0 = const()[name = tensor("op_6329_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6329_end_mask_0 = const()[name = tensor("op_6329_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6329_cast_fp16 = slice_by_index(begin = var_6329_begin_0, end = var_6329_end_0, end_mask = var_6329_end_mask_0, x = var_6267_cast_fp16)[name = tensor("op_6329_cast_fp16")]; tensor var_6330_begin_0 = const()[name = tensor("op_6330_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6330_end_0 = const()[name = tensor("op_6330_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6330_end_mask_0 = const()[name = tensor("op_6330_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6330_cast_fp16 = slice_by_index(begin = var_6330_begin_0, end = var_6330_end_0, end_mask = var_6330_end_mask_0, x = var_6267_cast_fp16)[name = tensor("op_6330_cast_fp16")]; tensor var_6331_begin_0 = const()[name = tensor("op_6331_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6331_end_0 = const()[name = tensor("op_6331_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6331_end_mask_0 = const()[name = tensor("op_6331_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6331_cast_fp16 = slice_by_index(begin = var_6331_begin_0, end = var_6331_end_0, end_mask = var_6331_end_mask_0, x = var_6267_cast_fp16)[name = tensor("op_6331_cast_fp16")]; tensor var_6332_begin_0 = const()[name = tensor("op_6332_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6332_end_0 = const()[name = tensor("op_6332_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6332_end_mask_0 = const()[name = tensor("op_6332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6332_cast_fp16 = slice_by_index(begin = var_6332_begin_0, end = var_6332_end_0, end_mask = var_6332_end_mask_0, x = var_6267_cast_fp16)[name = tensor("op_6332_cast_fp16")]; tensor var_6333_begin_0 = const()[name = tensor("op_6333_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6333_end_0 = const()[name = tensor("op_6333_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6333_end_mask_0 = const()[name = tensor("op_6333_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6333_cast_fp16 = slice_by_index(begin = var_6333_begin_0, end = var_6333_end_0, end_mask = var_6333_end_mask_0, x = var_6267_cast_fp16)[name = tensor("op_6333_cast_fp16")]; tensor var_6334_begin_0 = const()[name = tensor("op_6334_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6334_end_0 = const()[name = tensor("op_6334_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6334_end_mask_0 = const()[name = tensor("op_6334_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6334_cast_fp16 = slice_by_index(begin = var_6334_begin_0, end = var_6334_end_0, end_mask = var_6334_end_mask_0, x = var_6271_cast_fp16)[name = tensor("op_6334_cast_fp16")]; tensor var_6335_begin_0 = const()[name = tensor("op_6335_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6335_end_0 = const()[name = tensor("op_6335_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6335_end_mask_0 = const()[name = tensor("op_6335_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6335_cast_fp16 = slice_by_index(begin = var_6335_begin_0, end = var_6335_end_0, end_mask = var_6335_end_mask_0, x = var_6271_cast_fp16)[name = tensor("op_6335_cast_fp16")]; tensor var_6336_begin_0 = const()[name = tensor("op_6336_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6336_end_0 = const()[name = tensor("op_6336_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6336_end_mask_0 = const()[name = tensor("op_6336_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6336_cast_fp16 = slice_by_index(begin = var_6336_begin_0, end = var_6336_end_0, end_mask = var_6336_end_mask_0, x = var_6271_cast_fp16)[name = tensor("op_6336_cast_fp16")]; tensor var_6337_begin_0 = const()[name = tensor("op_6337_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6337_end_0 = const()[name = tensor("op_6337_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6337_end_mask_0 = const()[name = tensor("op_6337_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6337_cast_fp16 = slice_by_index(begin = var_6337_begin_0, end = var_6337_end_0, end_mask = var_6337_end_mask_0, x = var_6271_cast_fp16)[name = tensor("op_6337_cast_fp16")]; tensor var_6338_begin_0 = const()[name = tensor("op_6338_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6338_end_0 = const()[name = tensor("op_6338_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6338_end_mask_0 = const()[name = tensor("op_6338_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6338_cast_fp16 = slice_by_index(begin = var_6338_begin_0, end = var_6338_end_0, end_mask = var_6338_end_mask_0, x = var_6271_cast_fp16)[name = tensor("op_6338_cast_fp16")]; tensor var_6339_begin_0 = const()[name = tensor("op_6339_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6339_end_0 = const()[name = tensor("op_6339_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6339_end_mask_0 = const()[name = tensor("op_6339_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6339_cast_fp16 = slice_by_index(begin = var_6339_begin_0, end = var_6339_end_0, end_mask = var_6339_end_mask_0, x = var_6271_cast_fp16)[name = tensor("op_6339_cast_fp16")]; tensor var_6340_begin_0 = const()[name = tensor("op_6340_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6340_end_0 = const()[name = tensor("op_6340_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6340_end_mask_0 = const()[name = tensor("op_6340_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6340_cast_fp16 = slice_by_index(begin = var_6340_begin_0, end = var_6340_end_0, end_mask = var_6340_end_mask_0, x = var_6275_cast_fp16)[name = tensor("op_6340_cast_fp16")]; tensor var_6341_begin_0 = const()[name = tensor("op_6341_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6341_end_0 = const()[name = tensor("op_6341_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6341_end_mask_0 = const()[name = tensor("op_6341_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6341_cast_fp16 = slice_by_index(begin = var_6341_begin_0, end = var_6341_end_0, end_mask = var_6341_end_mask_0, x = var_6275_cast_fp16)[name = tensor("op_6341_cast_fp16")]; tensor var_6342_begin_0 = const()[name = tensor("op_6342_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6342_end_0 = const()[name = tensor("op_6342_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6342_end_mask_0 = const()[name = tensor("op_6342_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6342_cast_fp16 = slice_by_index(begin = var_6342_begin_0, end = var_6342_end_0, end_mask = var_6342_end_mask_0, x = var_6275_cast_fp16)[name = tensor("op_6342_cast_fp16")]; tensor var_6343_begin_0 = const()[name = tensor("op_6343_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6343_end_0 = const()[name = tensor("op_6343_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6343_end_mask_0 = const()[name = tensor("op_6343_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6343_cast_fp16 = slice_by_index(begin = var_6343_begin_0, end = var_6343_end_0, end_mask = var_6343_end_mask_0, x = var_6275_cast_fp16)[name = tensor("op_6343_cast_fp16")]; tensor var_6344_begin_0 = const()[name = tensor("op_6344_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6344_end_0 = const()[name = tensor("op_6344_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6344_end_mask_0 = const()[name = tensor("op_6344_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6344_cast_fp16 = slice_by_index(begin = var_6344_begin_0, end = var_6344_end_0, end_mask = var_6344_end_mask_0, x = var_6275_cast_fp16)[name = tensor("op_6344_cast_fp16")]; tensor var_6345_begin_0 = const()[name = tensor("op_6345_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6345_end_0 = const()[name = tensor("op_6345_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6345_end_mask_0 = const()[name = tensor("op_6345_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6345_cast_fp16 = slice_by_index(begin = var_6345_begin_0, end = var_6345_end_0, end_mask = var_6345_end_mask_0, x = var_6275_cast_fp16)[name = tensor("op_6345_cast_fp16")]; tensor var_6346_begin_0 = const()[name = tensor("op_6346_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6346_end_0 = const()[name = tensor("op_6346_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6346_end_mask_0 = const()[name = tensor("op_6346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6346_cast_fp16 = slice_by_index(begin = var_6346_begin_0, end = var_6346_end_0, end_mask = var_6346_end_mask_0, x = var_6279_cast_fp16)[name = tensor("op_6346_cast_fp16")]; tensor var_6347_begin_0 = const()[name = tensor("op_6347_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6347_end_0 = const()[name = tensor("op_6347_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6347_end_mask_0 = const()[name = tensor("op_6347_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6347_cast_fp16 = slice_by_index(begin = var_6347_begin_0, end = var_6347_end_0, end_mask = var_6347_end_mask_0, x = var_6279_cast_fp16)[name = tensor("op_6347_cast_fp16")]; tensor var_6348_begin_0 = const()[name = tensor("op_6348_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6348_end_0 = const()[name = tensor("op_6348_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6348_end_mask_0 = const()[name = tensor("op_6348_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6348_cast_fp16 = slice_by_index(begin = var_6348_begin_0, end = var_6348_end_0, end_mask = var_6348_end_mask_0, x = var_6279_cast_fp16)[name = tensor("op_6348_cast_fp16")]; tensor var_6349_begin_0 = const()[name = tensor("op_6349_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6349_end_0 = const()[name = tensor("op_6349_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6349_end_mask_0 = const()[name = tensor("op_6349_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6349_cast_fp16 = slice_by_index(begin = var_6349_begin_0, end = var_6349_end_0, end_mask = var_6349_end_mask_0, x = var_6279_cast_fp16)[name = tensor("op_6349_cast_fp16")]; tensor var_6350_begin_0 = const()[name = tensor("op_6350_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6350_end_0 = const()[name = tensor("op_6350_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6350_end_mask_0 = const()[name = tensor("op_6350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6350_cast_fp16 = slice_by_index(begin = var_6350_begin_0, end = var_6350_end_0, end_mask = var_6350_end_mask_0, x = var_6279_cast_fp16)[name = tensor("op_6350_cast_fp16")]; tensor var_6351_begin_0 = const()[name = tensor("op_6351_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6351_end_0 = const()[name = tensor("op_6351_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6351_end_mask_0 = const()[name = tensor("op_6351_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6351_cast_fp16 = slice_by_index(begin = var_6351_begin_0, end = var_6351_end_0, end_mask = var_6351_end_mask_0, x = var_6279_cast_fp16)[name = tensor("op_6351_cast_fp16")]; tensor var_6352_begin_0 = const()[name = tensor("op_6352_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6352_end_0 = const()[name = tensor("op_6352_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_6352_end_mask_0 = const()[name = tensor("op_6352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6352_cast_fp16 = slice_by_index(begin = var_6352_begin_0, end = var_6352_end_0, end_mask = var_6352_end_mask_0, x = var_6283_cast_fp16)[name = tensor("op_6352_cast_fp16")]; tensor var_6353_begin_0 = const()[name = tensor("op_6353_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6353_end_0 = const()[name = tensor("op_6353_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_6353_end_mask_0 = const()[name = tensor("op_6353_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6353_cast_fp16 = slice_by_index(begin = var_6353_begin_0, end = var_6353_end_0, end_mask = var_6353_end_mask_0, x = var_6283_cast_fp16)[name = tensor("op_6353_cast_fp16")]; tensor var_6354_begin_0 = const()[name = tensor("op_6354_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6354_end_0 = const()[name = tensor("op_6354_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_6354_end_mask_0 = const()[name = tensor("op_6354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6354_cast_fp16 = slice_by_index(begin = var_6354_begin_0, end = var_6354_end_0, end_mask = var_6354_end_mask_0, x = var_6283_cast_fp16)[name = tensor("op_6354_cast_fp16")]; tensor var_6355_begin_0 = const()[name = tensor("op_6355_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_6355_end_0 = const()[name = tensor("op_6355_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_6355_end_mask_0 = const()[name = tensor("op_6355_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6355_cast_fp16 = slice_by_index(begin = var_6355_begin_0, end = var_6355_end_0, end_mask = var_6355_end_mask_0, x = var_6283_cast_fp16)[name = tensor("op_6355_cast_fp16")]; tensor var_6356_begin_0 = const()[name = tensor("op_6356_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_6356_end_0 = const()[name = tensor("op_6356_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_6356_end_mask_0 = const()[name = tensor("op_6356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6356_cast_fp16 = slice_by_index(begin = var_6356_begin_0, end = var_6356_end_0, end_mask = var_6356_end_mask_0, x = var_6283_cast_fp16)[name = tensor("op_6356_cast_fp16")]; tensor var_6357_begin_0 = const()[name = tensor("op_6357_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_6357_end_0 = const()[name = tensor("op_6357_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_6357_end_mask_0 = const()[name = tensor("op_6357_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6357_cast_fp16 = slice_by_index(begin = var_6357_begin_0, end = var_6357_end_0, end_mask = var_6357_end_mask_0, x = var_6283_cast_fp16)[name = tensor("op_6357_cast_fp16")]; tensor k_15_perm_0 = const()[name = tensor("k_15_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_6362_begin_0 = const()[name = tensor("op_6362_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6362_end_0 = const()[name = tensor("op_6362_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_6362_end_mask_0 = const()[name = tensor("op_6362_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = key_15_cast_fp16)[name = tensor("transpose_4")]; tensor var_6362_cast_fp16 = slice_by_index(begin = var_6362_begin_0, end = var_6362_end_0, end_mask = var_6362_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6362_cast_fp16")]; tensor var_6366_begin_0 = const()[name = tensor("op_6366_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_6366_end_0 = const()[name = tensor("op_6366_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_6366_end_mask_0 = const()[name = tensor("op_6366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6366_cast_fp16 = slice_by_index(begin = var_6366_begin_0, end = var_6366_end_0, end_mask = var_6366_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6366_cast_fp16")]; tensor var_6370_begin_0 = const()[name = tensor("op_6370_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_6370_end_0 = const()[name = tensor("op_6370_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_6370_end_mask_0 = const()[name = tensor("op_6370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6370_cast_fp16 = slice_by_index(begin = var_6370_begin_0, end = var_6370_end_0, end_mask = var_6370_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6370_cast_fp16")]; tensor var_6374_begin_0 = const()[name = tensor("op_6374_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_6374_end_0 = const()[name = tensor("op_6374_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_6374_end_mask_0 = const()[name = tensor("op_6374_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6374_cast_fp16 = slice_by_index(begin = var_6374_begin_0, end = var_6374_end_0, end_mask = var_6374_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6374_cast_fp16")]; tensor var_6378_begin_0 = const()[name = tensor("op_6378_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_6378_end_0 = const()[name = tensor("op_6378_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_6378_end_mask_0 = const()[name = tensor("op_6378_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6378_cast_fp16 = slice_by_index(begin = var_6378_begin_0, end = var_6378_end_0, end_mask = var_6378_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6378_cast_fp16")]; tensor var_6382_begin_0 = const()[name = tensor("op_6382_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_6382_end_0 = const()[name = tensor("op_6382_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_6382_end_mask_0 = const()[name = tensor("op_6382_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6382_cast_fp16 = slice_by_index(begin = var_6382_begin_0, end = var_6382_end_0, end_mask = var_6382_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6382_cast_fp16")]; tensor var_6386_begin_0 = const()[name = tensor("op_6386_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_6386_end_0 = const()[name = tensor("op_6386_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_6386_end_mask_0 = const()[name = tensor("op_6386_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = var_6386_end_0, end_mask = var_6386_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6386_cast_fp16")]; tensor var_6390_begin_0 = const()[name = tensor("op_6390_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_6390_end_0 = const()[name = tensor("op_6390_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_6390_end_mask_0 = const()[name = tensor("op_6390_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6390_cast_fp16 = slice_by_index(begin = var_6390_begin_0, end = var_6390_end_0, end_mask = var_6390_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6390_cast_fp16")]; tensor var_6394_begin_0 = const()[name = tensor("op_6394_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_6394_end_0 = const()[name = tensor("op_6394_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_6394_end_mask_0 = const()[name = tensor("op_6394_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6394_cast_fp16 = slice_by_index(begin = var_6394_begin_0, end = var_6394_end_0, end_mask = var_6394_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6394_cast_fp16")]; tensor var_6398_begin_0 = const()[name = tensor("op_6398_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_6398_end_0 = const()[name = tensor("op_6398_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_6398_end_mask_0 = const()[name = tensor("op_6398_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6398_cast_fp16 = slice_by_index(begin = var_6398_begin_0, end = var_6398_end_0, end_mask = var_6398_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6398_cast_fp16")]; tensor var_6402_begin_0 = const()[name = tensor("op_6402_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_6402_end_0 = const()[name = tensor("op_6402_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_6402_end_mask_0 = const()[name = tensor("op_6402_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6402_cast_fp16 = slice_by_index(begin = var_6402_begin_0, end = var_6402_end_0, end_mask = var_6402_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6402_cast_fp16")]; tensor var_6406_begin_0 = const()[name = tensor("op_6406_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_6406_end_0 = const()[name = tensor("op_6406_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_6406_end_mask_0 = const()[name = tensor("op_6406_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6406_cast_fp16 = slice_by_index(begin = var_6406_begin_0, end = var_6406_end_0, end_mask = var_6406_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_6406_cast_fp16")]; tensor var_6408_begin_0 = const()[name = tensor("op_6408_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6408_end_0 = const()[name = tensor("op_6408_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6408_end_mask_0 = const()[name = tensor("op_6408_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6408_cast_fp16 = slice_by_index(begin = var_6408_begin_0, end = var_6408_end_0, end_mask = var_6408_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6408_cast_fp16")]; tensor var_6412_begin_0 = const()[name = tensor("op_6412_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_6412_end_0 = const()[name = tensor("op_6412_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_6412_end_mask_0 = const()[name = tensor("op_6412_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6412_cast_fp16 = slice_by_index(begin = var_6412_begin_0, end = var_6412_end_0, end_mask = var_6412_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6412_cast_fp16")]; tensor var_6416_begin_0 = const()[name = tensor("op_6416_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_6416_end_0 = const()[name = tensor("op_6416_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_6416_end_mask_0 = const()[name = tensor("op_6416_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6416_cast_fp16 = slice_by_index(begin = var_6416_begin_0, end = var_6416_end_0, end_mask = var_6416_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6416_cast_fp16")]; tensor var_6420_begin_0 = const()[name = tensor("op_6420_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_6420_end_0 = const()[name = tensor("op_6420_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_6420_end_mask_0 = const()[name = tensor("op_6420_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6420_cast_fp16 = slice_by_index(begin = var_6420_begin_0, end = var_6420_end_0, end_mask = var_6420_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6420_cast_fp16")]; tensor var_6424_begin_0 = const()[name = tensor("op_6424_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_6424_end_0 = const()[name = tensor("op_6424_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_6424_end_mask_0 = const()[name = tensor("op_6424_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6424_cast_fp16 = slice_by_index(begin = var_6424_begin_0, end = var_6424_end_0, end_mask = var_6424_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6424_cast_fp16")]; tensor var_6428_begin_0 = const()[name = tensor("op_6428_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6428_end_0 = const()[name = tensor("op_6428_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_6428_end_mask_0 = const()[name = tensor("op_6428_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6428_cast_fp16 = slice_by_index(begin = var_6428_begin_0, end = var_6428_end_0, end_mask = var_6428_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6428_cast_fp16")]; tensor var_6432_begin_0 = const()[name = tensor("op_6432_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_6432_end_0 = const()[name = tensor("op_6432_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_6432_end_mask_0 = const()[name = tensor("op_6432_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6432_cast_fp16 = slice_by_index(begin = var_6432_begin_0, end = var_6432_end_0, end_mask = var_6432_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6432_cast_fp16")]; tensor var_6436_begin_0 = const()[name = tensor("op_6436_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_6436_end_0 = const()[name = tensor("op_6436_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_6436_end_mask_0 = const()[name = tensor("op_6436_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6436_cast_fp16 = slice_by_index(begin = var_6436_begin_0, end = var_6436_end_0, end_mask = var_6436_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6436_cast_fp16")]; tensor var_6440_begin_0 = const()[name = tensor("op_6440_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_6440_end_0 = const()[name = tensor("op_6440_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_6440_end_mask_0 = const()[name = tensor("op_6440_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6440_cast_fp16 = slice_by_index(begin = var_6440_begin_0, end = var_6440_end_0, end_mask = var_6440_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6440_cast_fp16")]; tensor var_6444_begin_0 = const()[name = tensor("op_6444_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_6444_end_0 = const()[name = tensor("op_6444_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_6444_end_mask_0 = const()[name = tensor("op_6444_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6444_cast_fp16 = slice_by_index(begin = var_6444_begin_0, end = var_6444_end_0, end_mask = var_6444_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6444_cast_fp16")]; tensor var_6448_begin_0 = const()[name = tensor("op_6448_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6448_end_0 = const()[name = tensor("op_6448_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_6448_end_mask_0 = const()[name = tensor("op_6448_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6448_cast_fp16 = slice_by_index(begin = var_6448_begin_0, end = var_6448_end_0, end_mask = var_6448_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6448_cast_fp16")]; tensor var_6452_begin_0 = const()[name = tensor("op_6452_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_6452_end_0 = const()[name = tensor("op_6452_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_6452_end_mask_0 = const()[name = tensor("op_6452_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_6452_cast_fp16 = slice_by_index(begin = var_6452_begin_0, end = var_6452_end_0, end_mask = var_6452_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_6452_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1009_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1009_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1009_equation_0, values = (var_6362_cast_fp16, var_6286_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1009_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1011_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1011_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1011_equation_0, values = (var_6362_cast_fp16, var_6287_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1011_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1013_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1013_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1013_equation_0, values = (var_6362_cast_fp16, var_6288_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1013_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1015_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1015_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1015_equation_0, values = (var_6362_cast_fp16, var_6289_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1015_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1017_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1017_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1017_equation_0, values = (var_6362_cast_fp16, var_6290_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1017_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1019_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1019_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1019_equation_0, values = (var_6362_cast_fp16, var_6291_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1019_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1021_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1021_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1021_equation_0, values = (var_6366_cast_fp16, var_6292_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1021_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1023_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1023_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1023_equation_0, values = (var_6366_cast_fp16, var_6293_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1023_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1025_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1025_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1025_equation_0, values = (var_6366_cast_fp16, var_6294_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1027_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1027_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1027_equation_0, values = (var_6366_cast_fp16, var_6295_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1027_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1029_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1029_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1029_equation_0, values = (var_6366_cast_fp16, var_6296_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1029_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1031_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1031_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1031_equation_0, values = (var_6366_cast_fp16, var_6297_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1031_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1033_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1033_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1033_equation_0, values = (var_6370_cast_fp16, var_6298_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1033_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1035_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1035_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1035_equation_0, values = (var_6370_cast_fp16, var_6299_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1035_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1037_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1037_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1037_equation_0, values = (var_6370_cast_fp16, var_6300_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1037_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1039_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1039_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1039_equation_0, values = (var_6370_cast_fp16, var_6301_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1039_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1041_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1041_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1041_equation_0, values = (var_6370_cast_fp16, var_6302_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1041_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1043_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1043_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1043_equation_0, values = (var_6370_cast_fp16, var_6303_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1043_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1045_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1045_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1045_equation_0, values = (var_6374_cast_fp16, var_6304_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1045_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1047_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1047_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1047_equation_0, values = (var_6374_cast_fp16, var_6305_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1047_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1049_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1049_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1049_equation_0, values = (var_6374_cast_fp16, var_6306_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1049_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1051_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1051_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1051_equation_0, values = (var_6374_cast_fp16, var_6307_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1051_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1053_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1053_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1053_equation_0, values = (var_6374_cast_fp16, var_6308_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1055_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1055_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1055_equation_0, values = (var_6374_cast_fp16, var_6309_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1055_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1057_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1057_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1057_equation_0, values = (var_6378_cast_fp16, var_6310_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1057_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1059_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1059_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1059_equation_0, values = (var_6378_cast_fp16, var_6311_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1059_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1061_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1061_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1061_equation_0, values = (var_6378_cast_fp16, var_6312_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1061_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1063_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1063_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1063_equation_0, values = (var_6378_cast_fp16, var_6313_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1063_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1065_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1065_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1065_equation_0, values = (var_6378_cast_fp16, var_6314_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1065_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1067_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1067_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1067_equation_0, values = (var_6378_cast_fp16, var_6315_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1067_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1069_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1069_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1069_equation_0, values = (var_6382_cast_fp16, var_6316_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1069_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1071_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1071_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1071_equation_0, values = (var_6382_cast_fp16, var_6317_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1071_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1073_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1073_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1073_equation_0, values = (var_6382_cast_fp16, var_6318_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1073_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1075_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1075_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1075_equation_0, values = (var_6382_cast_fp16, var_6319_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1075_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1077_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1077_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1077_equation_0, values = (var_6382_cast_fp16, var_6320_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1077_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1079_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1079_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1079_equation_0, values = (var_6382_cast_fp16, var_6321_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1079_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1081_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1081_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1081_equation_0, values = (var_6386_cast_fp16, var_6322_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1083_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1083_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1083_equation_0, values = (var_6386_cast_fp16, var_6323_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1083_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1085_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1085_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1085_equation_0, values = (var_6386_cast_fp16, var_6324_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1085_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1087_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1087_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1087_equation_0, values = (var_6386_cast_fp16, var_6325_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1087_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1089_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1089_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1089_equation_0, values = (var_6386_cast_fp16, var_6326_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1089_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1091_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1091_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1091_equation_0, values = (var_6386_cast_fp16, var_6327_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1091_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1093_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1093_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1093_equation_0, values = (var_6390_cast_fp16, var_6328_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1093_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1095_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1095_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1095_equation_0, values = (var_6390_cast_fp16, var_6329_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1095_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1097_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1097_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1097_equation_0, values = (var_6390_cast_fp16, var_6330_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1097_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1099_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1099_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1099_equation_0, values = (var_6390_cast_fp16, var_6331_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1099_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1101_equation_0, values = (var_6390_cast_fp16, var_6332_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1103_equation_0, values = (var_6390_cast_fp16, var_6333_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1105_equation_0, values = (var_6394_cast_fp16, var_6334_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1107_equation_0, values = (var_6394_cast_fp16, var_6335_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1109_equation_0, values = (var_6394_cast_fp16, var_6336_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1111_equation_0, values = (var_6394_cast_fp16, var_6337_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1113_equation_0, values = (var_6394_cast_fp16, var_6338_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1115_equation_0, values = (var_6394_cast_fp16, var_6339_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1117_equation_0, values = (var_6398_cast_fp16, var_6340_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1119_equation_0, values = (var_6398_cast_fp16, var_6341_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1121_equation_0, values = (var_6398_cast_fp16, var_6342_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1123_equation_0, values = (var_6398_cast_fp16, var_6343_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1125_equation_0, values = (var_6398_cast_fp16, var_6344_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1127_equation_0, values = (var_6398_cast_fp16, var_6345_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1129_equation_0, values = (var_6402_cast_fp16, var_6346_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1131_equation_0, values = (var_6402_cast_fp16, var_6347_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1133_equation_0, values = (var_6402_cast_fp16, var_6348_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1135_equation_0, values = (var_6402_cast_fp16, var_6349_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1137_equation_0, values = (var_6402_cast_fp16, var_6350_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1139_equation_0, values = (var_6402_cast_fp16, var_6351_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1141_equation_0, values = (var_6406_cast_fp16, var_6352_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1143_equation_0, values = (var_6406_cast_fp16, var_6353_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1145_equation_0, values = (var_6406_cast_fp16, var_6354_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1147_equation_0, values = (var_6406_cast_fp16, var_6355_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1149_equation_0, values = (var_6406_cast_fp16, var_6356_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1151_equation_0, values = (var_6406_cast_fp16, var_6357_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1151_cast_fp16")]; tensor var_6599_to_fp16 = const()[name = tensor("op_6599_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1009_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1009_cast_fp16, y = var_6599_to_fp16)[name = tensor("aw_chunk_1009_cast_fp16")]; tensor var_6601_to_fp16 = const()[name = tensor("op_6601_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1011_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1011_cast_fp16, y = var_6601_to_fp16)[name = tensor("aw_chunk_1011_cast_fp16")]; tensor var_6603_to_fp16 = const()[name = tensor("op_6603_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1013_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1013_cast_fp16, y = var_6603_to_fp16)[name = tensor("aw_chunk_1013_cast_fp16")]; tensor var_6605_to_fp16 = const()[name = tensor("op_6605_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1015_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1015_cast_fp16, y = var_6605_to_fp16)[name = tensor("aw_chunk_1015_cast_fp16")]; tensor var_6607_to_fp16 = const()[name = tensor("op_6607_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1017_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1017_cast_fp16, y = var_6607_to_fp16)[name = tensor("aw_chunk_1017_cast_fp16")]; tensor var_6609_to_fp16 = const()[name = tensor("op_6609_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1019_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1019_cast_fp16, y = var_6609_to_fp16)[name = tensor("aw_chunk_1019_cast_fp16")]; tensor var_6611_to_fp16 = const()[name = tensor("op_6611_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1021_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1021_cast_fp16, y = var_6611_to_fp16)[name = tensor("aw_chunk_1021_cast_fp16")]; tensor var_6613_to_fp16 = const()[name = tensor("op_6613_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1023_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1023_cast_fp16, y = var_6613_to_fp16)[name = tensor("aw_chunk_1023_cast_fp16")]; tensor var_6615_to_fp16 = const()[name = tensor("op_6615_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1025_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1025_cast_fp16, y = var_6615_to_fp16)[name = tensor("aw_chunk_1025_cast_fp16")]; tensor var_6617_to_fp16 = const()[name = tensor("op_6617_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1027_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1027_cast_fp16, y = var_6617_to_fp16)[name = tensor("aw_chunk_1027_cast_fp16")]; tensor var_6619_to_fp16 = const()[name = tensor("op_6619_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1029_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1029_cast_fp16, y = var_6619_to_fp16)[name = tensor("aw_chunk_1029_cast_fp16")]; tensor var_6621_to_fp16 = const()[name = tensor("op_6621_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1031_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1031_cast_fp16, y = var_6621_to_fp16)[name = tensor("aw_chunk_1031_cast_fp16")]; tensor var_6623_to_fp16 = const()[name = tensor("op_6623_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1033_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1033_cast_fp16, y = var_6623_to_fp16)[name = tensor("aw_chunk_1033_cast_fp16")]; tensor var_6625_to_fp16 = const()[name = tensor("op_6625_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1035_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1035_cast_fp16, y = var_6625_to_fp16)[name = tensor("aw_chunk_1035_cast_fp16")]; tensor var_6627_to_fp16 = const()[name = tensor("op_6627_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1037_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1037_cast_fp16, y = var_6627_to_fp16)[name = tensor("aw_chunk_1037_cast_fp16")]; tensor var_6629_to_fp16 = const()[name = tensor("op_6629_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1039_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1039_cast_fp16, y = var_6629_to_fp16)[name = tensor("aw_chunk_1039_cast_fp16")]; tensor var_6631_to_fp16 = const()[name = tensor("op_6631_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1041_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1041_cast_fp16, y = var_6631_to_fp16)[name = tensor("aw_chunk_1041_cast_fp16")]; tensor var_6633_to_fp16 = const()[name = tensor("op_6633_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1043_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1043_cast_fp16, y = var_6633_to_fp16)[name = tensor("aw_chunk_1043_cast_fp16")]; tensor var_6635_to_fp16 = const()[name = tensor("op_6635_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1045_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1045_cast_fp16, y = var_6635_to_fp16)[name = tensor("aw_chunk_1045_cast_fp16")]; tensor var_6637_to_fp16 = const()[name = tensor("op_6637_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1047_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1047_cast_fp16, y = var_6637_to_fp16)[name = tensor("aw_chunk_1047_cast_fp16")]; tensor var_6639_to_fp16 = const()[name = tensor("op_6639_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1049_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1049_cast_fp16, y = var_6639_to_fp16)[name = tensor("aw_chunk_1049_cast_fp16")]; tensor var_6641_to_fp16 = const()[name = tensor("op_6641_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1051_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1051_cast_fp16, y = var_6641_to_fp16)[name = tensor("aw_chunk_1051_cast_fp16")]; tensor var_6643_to_fp16 = const()[name = tensor("op_6643_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1053_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1053_cast_fp16, y = var_6643_to_fp16)[name = tensor("aw_chunk_1053_cast_fp16")]; tensor var_6645_to_fp16 = const()[name = tensor("op_6645_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1055_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1055_cast_fp16, y = var_6645_to_fp16)[name = tensor("aw_chunk_1055_cast_fp16")]; tensor var_6647_to_fp16 = const()[name = tensor("op_6647_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1057_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1057_cast_fp16, y = var_6647_to_fp16)[name = tensor("aw_chunk_1057_cast_fp16")]; tensor var_6649_to_fp16 = const()[name = tensor("op_6649_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1059_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1059_cast_fp16, y = var_6649_to_fp16)[name = tensor("aw_chunk_1059_cast_fp16")]; tensor var_6651_to_fp16 = const()[name = tensor("op_6651_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1061_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1061_cast_fp16, y = var_6651_to_fp16)[name = tensor("aw_chunk_1061_cast_fp16")]; tensor var_6653_to_fp16 = const()[name = tensor("op_6653_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1063_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1063_cast_fp16, y = var_6653_to_fp16)[name = tensor("aw_chunk_1063_cast_fp16")]; tensor var_6655_to_fp16 = const()[name = tensor("op_6655_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1065_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1065_cast_fp16, y = var_6655_to_fp16)[name = tensor("aw_chunk_1065_cast_fp16")]; tensor var_6657_to_fp16 = const()[name = tensor("op_6657_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1067_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1067_cast_fp16, y = var_6657_to_fp16)[name = tensor("aw_chunk_1067_cast_fp16")]; tensor var_6659_to_fp16 = const()[name = tensor("op_6659_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1069_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1069_cast_fp16, y = var_6659_to_fp16)[name = tensor("aw_chunk_1069_cast_fp16")]; tensor var_6661_to_fp16 = const()[name = tensor("op_6661_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1071_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1071_cast_fp16, y = var_6661_to_fp16)[name = tensor("aw_chunk_1071_cast_fp16")]; tensor var_6663_to_fp16 = const()[name = tensor("op_6663_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1073_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1073_cast_fp16, y = var_6663_to_fp16)[name = tensor("aw_chunk_1073_cast_fp16")]; tensor var_6665_to_fp16 = const()[name = tensor("op_6665_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1075_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1075_cast_fp16, y = var_6665_to_fp16)[name = tensor("aw_chunk_1075_cast_fp16")]; tensor var_6667_to_fp16 = const()[name = tensor("op_6667_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1077_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1077_cast_fp16, y = var_6667_to_fp16)[name = tensor("aw_chunk_1077_cast_fp16")]; tensor var_6669_to_fp16 = const()[name = tensor("op_6669_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1079_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1079_cast_fp16, y = var_6669_to_fp16)[name = tensor("aw_chunk_1079_cast_fp16")]; tensor var_6671_to_fp16 = const()[name = tensor("op_6671_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1081_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1081_cast_fp16, y = var_6671_to_fp16)[name = tensor("aw_chunk_1081_cast_fp16")]; tensor var_6673_to_fp16 = const()[name = tensor("op_6673_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1083_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1083_cast_fp16, y = var_6673_to_fp16)[name = tensor("aw_chunk_1083_cast_fp16")]; tensor var_6675_to_fp16 = const()[name = tensor("op_6675_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1085_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1085_cast_fp16, y = var_6675_to_fp16)[name = tensor("aw_chunk_1085_cast_fp16")]; tensor var_6677_to_fp16 = const()[name = tensor("op_6677_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1087_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1087_cast_fp16, y = var_6677_to_fp16)[name = tensor("aw_chunk_1087_cast_fp16")]; tensor var_6679_to_fp16 = const()[name = tensor("op_6679_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1089_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1089_cast_fp16, y = var_6679_to_fp16)[name = tensor("aw_chunk_1089_cast_fp16")]; tensor var_6681_to_fp16 = const()[name = tensor("op_6681_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1091_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1091_cast_fp16, y = var_6681_to_fp16)[name = tensor("aw_chunk_1091_cast_fp16")]; tensor var_6683_to_fp16 = const()[name = tensor("op_6683_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1093_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1093_cast_fp16, y = var_6683_to_fp16)[name = tensor("aw_chunk_1093_cast_fp16")]; tensor var_6685_to_fp16 = const()[name = tensor("op_6685_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1095_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1095_cast_fp16, y = var_6685_to_fp16)[name = tensor("aw_chunk_1095_cast_fp16")]; tensor var_6687_to_fp16 = const()[name = tensor("op_6687_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1097_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1097_cast_fp16, y = var_6687_to_fp16)[name = tensor("aw_chunk_1097_cast_fp16")]; tensor var_6689_to_fp16 = const()[name = tensor("op_6689_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1099_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1099_cast_fp16, y = var_6689_to_fp16)[name = tensor("aw_chunk_1099_cast_fp16")]; tensor var_6691_to_fp16 = const()[name = tensor("op_6691_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1101_cast_fp16, y = var_6691_to_fp16)[name = tensor("aw_chunk_1101_cast_fp16")]; tensor var_6693_to_fp16 = const()[name = tensor("op_6693_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1103_cast_fp16, y = var_6693_to_fp16)[name = tensor("aw_chunk_1103_cast_fp16")]; tensor var_6695_to_fp16 = const()[name = tensor("op_6695_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1105_cast_fp16, y = var_6695_to_fp16)[name = tensor("aw_chunk_1105_cast_fp16")]; tensor var_6697_to_fp16 = const()[name = tensor("op_6697_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1107_cast_fp16, y = var_6697_to_fp16)[name = tensor("aw_chunk_1107_cast_fp16")]; tensor var_6699_to_fp16 = const()[name = tensor("op_6699_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1109_cast_fp16, y = var_6699_to_fp16)[name = tensor("aw_chunk_1109_cast_fp16")]; tensor var_6701_to_fp16 = const()[name = tensor("op_6701_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1111_cast_fp16, y = var_6701_to_fp16)[name = tensor("aw_chunk_1111_cast_fp16")]; tensor var_6703_to_fp16 = const()[name = tensor("op_6703_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1113_cast_fp16, y = var_6703_to_fp16)[name = tensor("aw_chunk_1113_cast_fp16")]; tensor var_6705_to_fp16 = const()[name = tensor("op_6705_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1115_cast_fp16, y = var_6705_to_fp16)[name = tensor("aw_chunk_1115_cast_fp16")]; tensor var_6707_to_fp16 = const()[name = tensor("op_6707_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1117_cast_fp16, y = var_6707_to_fp16)[name = tensor("aw_chunk_1117_cast_fp16")]; tensor var_6709_to_fp16 = const()[name = tensor("op_6709_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1119_cast_fp16, y = var_6709_to_fp16)[name = tensor("aw_chunk_1119_cast_fp16")]; tensor var_6711_to_fp16 = const()[name = tensor("op_6711_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1121_cast_fp16, y = var_6711_to_fp16)[name = tensor("aw_chunk_1121_cast_fp16")]; tensor var_6713_to_fp16 = const()[name = tensor("op_6713_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1123_cast_fp16, y = var_6713_to_fp16)[name = tensor("aw_chunk_1123_cast_fp16")]; tensor var_6715_to_fp16 = const()[name = tensor("op_6715_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1125_cast_fp16, y = var_6715_to_fp16)[name = tensor("aw_chunk_1125_cast_fp16")]; tensor var_6717_to_fp16 = const()[name = tensor("op_6717_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1127_cast_fp16, y = var_6717_to_fp16)[name = tensor("aw_chunk_1127_cast_fp16")]; tensor var_6719_to_fp16 = const()[name = tensor("op_6719_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1129_cast_fp16, y = var_6719_to_fp16)[name = tensor("aw_chunk_1129_cast_fp16")]; tensor var_6721_to_fp16 = const()[name = tensor("op_6721_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1131_cast_fp16, y = var_6721_to_fp16)[name = tensor("aw_chunk_1131_cast_fp16")]; tensor var_6723_to_fp16 = const()[name = tensor("op_6723_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1133_cast_fp16, y = var_6723_to_fp16)[name = tensor("aw_chunk_1133_cast_fp16")]; tensor var_6725_to_fp16 = const()[name = tensor("op_6725_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1135_cast_fp16, y = var_6725_to_fp16)[name = tensor("aw_chunk_1135_cast_fp16")]; tensor var_6727_to_fp16 = const()[name = tensor("op_6727_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1137_cast_fp16, y = var_6727_to_fp16)[name = tensor("aw_chunk_1137_cast_fp16")]; tensor var_6729_to_fp16 = const()[name = tensor("op_6729_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1139_cast_fp16, y = var_6729_to_fp16)[name = tensor("aw_chunk_1139_cast_fp16")]; tensor var_6731_to_fp16 = const()[name = tensor("op_6731_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1141_cast_fp16, y = var_6731_to_fp16)[name = tensor("aw_chunk_1141_cast_fp16")]; tensor var_6733_to_fp16 = const()[name = tensor("op_6733_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1143_cast_fp16, y = var_6733_to_fp16)[name = tensor("aw_chunk_1143_cast_fp16")]; tensor var_6735_to_fp16 = const()[name = tensor("op_6735_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1145_cast_fp16, y = var_6735_to_fp16)[name = tensor("aw_chunk_1145_cast_fp16")]; tensor var_6737_to_fp16 = const()[name = tensor("op_6737_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1147_cast_fp16, y = var_6737_to_fp16)[name = tensor("aw_chunk_1147_cast_fp16")]; tensor var_6739_to_fp16 = const()[name = tensor("op_6739_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1149_cast_fp16, y = var_6739_to_fp16)[name = tensor("aw_chunk_1149_cast_fp16")]; tensor var_6741_to_fp16 = const()[name = tensor("op_6741_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1151_cast_fp16, y = var_6741_to_fp16)[name = tensor("aw_chunk_1151_cast_fp16")]; tensor var_6743_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1009_cast_fp16)[name = tensor("op_6743_cast_fp16")]; tensor var_6744_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1011_cast_fp16)[name = tensor("op_6744_cast_fp16")]; tensor var_6745_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1013_cast_fp16)[name = tensor("op_6745_cast_fp16")]; tensor var_6746_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1015_cast_fp16)[name = tensor("op_6746_cast_fp16")]; tensor var_6747_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1017_cast_fp16)[name = tensor("op_6747_cast_fp16")]; tensor var_6748_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1019_cast_fp16)[name = tensor("op_6748_cast_fp16")]; tensor var_6749_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1021_cast_fp16)[name = tensor("op_6749_cast_fp16")]; tensor var_6750_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1023_cast_fp16)[name = tensor("op_6750_cast_fp16")]; tensor var_6751_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1025_cast_fp16)[name = tensor("op_6751_cast_fp16")]; tensor var_6752_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1027_cast_fp16)[name = tensor("op_6752_cast_fp16")]; tensor var_6753_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1029_cast_fp16)[name = tensor("op_6753_cast_fp16")]; tensor var_6754_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1031_cast_fp16)[name = tensor("op_6754_cast_fp16")]; tensor var_6755_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1033_cast_fp16)[name = tensor("op_6755_cast_fp16")]; tensor var_6756_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1035_cast_fp16)[name = tensor("op_6756_cast_fp16")]; tensor var_6757_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1037_cast_fp16)[name = tensor("op_6757_cast_fp16")]; tensor var_6758_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1039_cast_fp16)[name = tensor("op_6758_cast_fp16")]; tensor var_6759_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1041_cast_fp16)[name = tensor("op_6759_cast_fp16")]; tensor var_6760_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1043_cast_fp16)[name = tensor("op_6760_cast_fp16")]; tensor var_6761_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1045_cast_fp16)[name = tensor("op_6761_cast_fp16")]; tensor var_6762_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1047_cast_fp16)[name = tensor("op_6762_cast_fp16")]; tensor var_6763_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1049_cast_fp16)[name = tensor("op_6763_cast_fp16")]; tensor var_6764_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1051_cast_fp16)[name = tensor("op_6764_cast_fp16")]; tensor var_6765_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1053_cast_fp16)[name = tensor("op_6765_cast_fp16")]; tensor var_6766_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1055_cast_fp16)[name = tensor("op_6766_cast_fp16")]; tensor var_6767_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1057_cast_fp16)[name = tensor("op_6767_cast_fp16")]; tensor var_6768_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1059_cast_fp16)[name = tensor("op_6768_cast_fp16")]; tensor var_6769_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1061_cast_fp16)[name = tensor("op_6769_cast_fp16")]; tensor var_6770_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1063_cast_fp16)[name = tensor("op_6770_cast_fp16")]; tensor var_6771_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1065_cast_fp16)[name = tensor("op_6771_cast_fp16")]; tensor var_6772_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1067_cast_fp16)[name = tensor("op_6772_cast_fp16")]; tensor var_6773_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1069_cast_fp16)[name = tensor("op_6773_cast_fp16")]; tensor var_6774_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1071_cast_fp16)[name = tensor("op_6774_cast_fp16")]; tensor var_6775_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1073_cast_fp16)[name = tensor("op_6775_cast_fp16")]; tensor var_6776_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1075_cast_fp16)[name = tensor("op_6776_cast_fp16")]; tensor var_6777_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1077_cast_fp16)[name = tensor("op_6777_cast_fp16")]; tensor var_6778_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1079_cast_fp16)[name = tensor("op_6778_cast_fp16")]; tensor var_6779_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1081_cast_fp16)[name = tensor("op_6779_cast_fp16")]; tensor var_6780_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1083_cast_fp16)[name = tensor("op_6780_cast_fp16")]; tensor var_6781_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1085_cast_fp16)[name = tensor("op_6781_cast_fp16")]; tensor var_6782_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1087_cast_fp16)[name = tensor("op_6782_cast_fp16")]; tensor var_6783_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1089_cast_fp16)[name = tensor("op_6783_cast_fp16")]; tensor var_6784_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1091_cast_fp16)[name = tensor("op_6784_cast_fp16")]; tensor var_6785_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1093_cast_fp16)[name = tensor("op_6785_cast_fp16")]; tensor var_6786_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1095_cast_fp16)[name = tensor("op_6786_cast_fp16")]; tensor var_6787_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1097_cast_fp16)[name = tensor("op_6787_cast_fp16")]; tensor var_6788_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1099_cast_fp16)[name = tensor("op_6788_cast_fp16")]; tensor var_6789_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1101_cast_fp16)[name = tensor("op_6789_cast_fp16")]; tensor var_6790_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1103_cast_fp16)[name = tensor("op_6790_cast_fp16")]; tensor var_6791_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1105_cast_fp16)[name = tensor("op_6791_cast_fp16")]; tensor var_6792_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1107_cast_fp16)[name = tensor("op_6792_cast_fp16")]; tensor var_6793_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1109_cast_fp16)[name = tensor("op_6793_cast_fp16")]; tensor var_6794_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1111_cast_fp16)[name = tensor("op_6794_cast_fp16")]; tensor var_6795_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1113_cast_fp16)[name = tensor("op_6795_cast_fp16")]; tensor var_6796_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1115_cast_fp16)[name = tensor("op_6796_cast_fp16")]; tensor var_6797_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1117_cast_fp16)[name = tensor("op_6797_cast_fp16")]; tensor var_6798_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1119_cast_fp16)[name = tensor("op_6798_cast_fp16")]; tensor var_6799_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1121_cast_fp16)[name = tensor("op_6799_cast_fp16")]; tensor var_6800_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1123_cast_fp16)[name = tensor("op_6800_cast_fp16")]; tensor var_6801_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1125_cast_fp16)[name = tensor("op_6801_cast_fp16")]; tensor var_6802_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1127_cast_fp16)[name = tensor("op_6802_cast_fp16")]; tensor var_6803_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1129_cast_fp16)[name = tensor("op_6803_cast_fp16")]; tensor var_6804_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1131_cast_fp16)[name = tensor("op_6804_cast_fp16")]; tensor var_6805_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1133_cast_fp16)[name = tensor("op_6805_cast_fp16")]; tensor var_6806_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1135_cast_fp16)[name = tensor("op_6806_cast_fp16")]; tensor var_6807_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1137_cast_fp16)[name = tensor("op_6807_cast_fp16")]; tensor var_6808_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1139_cast_fp16)[name = tensor("op_6808_cast_fp16")]; tensor var_6809_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1141_cast_fp16)[name = tensor("op_6809_cast_fp16")]; tensor var_6810_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1143_cast_fp16)[name = tensor("op_6810_cast_fp16")]; tensor var_6811_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1145_cast_fp16)[name = tensor("op_6811_cast_fp16")]; tensor var_6812_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1147_cast_fp16)[name = tensor("op_6812_cast_fp16")]; tensor var_6813_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1149_cast_fp16)[name = tensor("op_6813_cast_fp16")]; tensor var_6814_cast_fp16 = softmax(axis = var_6187, x = aw_chunk_1151_cast_fp16)[name = tensor("op_6814_cast_fp16")]; tensor var_6816_equation_0 = const()[name = tensor("op_6816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6816_cast_fp16 = einsum(equation = var_6816_equation_0, values = (var_6408_cast_fp16, var_6743_cast_fp16))[name = tensor("op_6816_cast_fp16")]; tensor var_6818_equation_0 = const()[name = tensor("op_6818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6818_cast_fp16 = einsum(equation = var_6818_equation_0, values = (var_6408_cast_fp16, var_6744_cast_fp16))[name = tensor("op_6818_cast_fp16")]; tensor var_6820_equation_0 = const()[name = tensor("op_6820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6820_cast_fp16 = einsum(equation = var_6820_equation_0, values = (var_6408_cast_fp16, var_6745_cast_fp16))[name = tensor("op_6820_cast_fp16")]; tensor var_6822_equation_0 = const()[name = tensor("op_6822_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6822_cast_fp16 = einsum(equation = var_6822_equation_0, values = (var_6408_cast_fp16, var_6746_cast_fp16))[name = tensor("op_6822_cast_fp16")]; tensor var_6824_equation_0 = const()[name = tensor("op_6824_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6824_cast_fp16 = einsum(equation = var_6824_equation_0, values = (var_6408_cast_fp16, var_6747_cast_fp16))[name = tensor("op_6824_cast_fp16")]; tensor var_6826_equation_0 = const()[name = tensor("op_6826_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6826_cast_fp16 = einsum(equation = var_6826_equation_0, values = (var_6408_cast_fp16, var_6748_cast_fp16))[name = tensor("op_6826_cast_fp16")]; tensor var_6828_equation_0 = const()[name = tensor("op_6828_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6828_cast_fp16 = einsum(equation = var_6828_equation_0, values = (var_6412_cast_fp16, var_6749_cast_fp16))[name = tensor("op_6828_cast_fp16")]; tensor var_6830_equation_0 = const()[name = tensor("op_6830_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6830_cast_fp16 = einsum(equation = var_6830_equation_0, values = (var_6412_cast_fp16, var_6750_cast_fp16))[name = tensor("op_6830_cast_fp16")]; tensor var_6832_equation_0 = const()[name = tensor("op_6832_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6832_cast_fp16 = einsum(equation = var_6832_equation_0, values = (var_6412_cast_fp16, var_6751_cast_fp16))[name = tensor("op_6832_cast_fp16")]; tensor var_6834_equation_0 = const()[name = tensor("op_6834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6834_cast_fp16 = einsum(equation = var_6834_equation_0, values = (var_6412_cast_fp16, var_6752_cast_fp16))[name = tensor("op_6834_cast_fp16")]; tensor var_6836_equation_0 = const()[name = tensor("op_6836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6836_cast_fp16 = einsum(equation = var_6836_equation_0, values = (var_6412_cast_fp16, var_6753_cast_fp16))[name = tensor("op_6836_cast_fp16")]; tensor var_6838_equation_0 = const()[name = tensor("op_6838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6838_cast_fp16 = einsum(equation = var_6838_equation_0, values = (var_6412_cast_fp16, var_6754_cast_fp16))[name = tensor("op_6838_cast_fp16")]; tensor var_6840_equation_0 = const()[name = tensor("op_6840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6840_cast_fp16 = einsum(equation = var_6840_equation_0, values = (var_6416_cast_fp16, var_6755_cast_fp16))[name = tensor("op_6840_cast_fp16")]; tensor var_6842_equation_0 = const()[name = tensor("op_6842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6842_cast_fp16 = einsum(equation = var_6842_equation_0, values = (var_6416_cast_fp16, var_6756_cast_fp16))[name = tensor("op_6842_cast_fp16")]; tensor var_6844_equation_0 = const()[name = tensor("op_6844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6844_cast_fp16 = einsum(equation = var_6844_equation_0, values = (var_6416_cast_fp16, var_6757_cast_fp16))[name = tensor("op_6844_cast_fp16")]; tensor var_6846_equation_0 = const()[name = tensor("op_6846_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6846_cast_fp16 = einsum(equation = var_6846_equation_0, values = (var_6416_cast_fp16, var_6758_cast_fp16))[name = tensor("op_6846_cast_fp16")]; tensor var_6848_equation_0 = const()[name = tensor("op_6848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6848_cast_fp16 = einsum(equation = var_6848_equation_0, values = (var_6416_cast_fp16, var_6759_cast_fp16))[name = tensor("op_6848_cast_fp16")]; tensor var_6850_equation_0 = const()[name = tensor("op_6850_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6850_cast_fp16 = einsum(equation = var_6850_equation_0, values = (var_6416_cast_fp16, var_6760_cast_fp16))[name = tensor("op_6850_cast_fp16")]; tensor var_6852_equation_0 = const()[name = tensor("op_6852_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6852_cast_fp16 = einsum(equation = var_6852_equation_0, values = (var_6420_cast_fp16, var_6761_cast_fp16))[name = tensor("op_6852_cast_fp16")]; tensor var_6854_equation_0 = const()[name = tensor("op_6854_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6854_cast_fp16 = einsum(equation = var_6854_equation_0, values = (var_6420_cast_fp16, var_6762_cast_fp16))[name = tensor("op_6854_cast_fp16")]; tensor var_6856_equation_0 = const()[name = tensor("op_6856_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6856_cast_fp16 = einsum(equation = var_6856_equation_0, values = (var_6420_cast_fp16, var_6763_cast_fp16))[name = tensor("op_6856_cast_fp16")]; tensor var_6858_equation_0 = const()[name = tensor("op_6858_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6858_cast_fp16 = einsum(equation = var_6858_equation_0, values = (var_6420_cast_fp16, var_6764_cast_fp16))[name = tensor("op_6858_cast_fp16")]; tensor var_6860_equation_0 = const()[name = tensor("op_6860_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6860_cast_fp16 = einsum(equation = var_6860_equation_0, values = (var_6420_cast_fp16, var_6765_cast_fp16))[name = tensor("op_6860_cast_fp16")]; tensor var_6862_equation_0 = const()[name = tensor("op_6862_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6862_cast_fp16 = einsum(equation = var_6862_equation_0, values = (var_6420_cast_fp16, var_6766_cast_fp16))[name = tensor("op_6862_cast_fp16")]; tensor var_6864_equation_0 = const()[name = tensor("op_6864_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6864_cast_fp16 = einsum(equation = var_6864_equation_0, values = (var_6424_cast_fp16, var_6767_cast_fp16))[name = tensor("op_6864_cast_fp16")]; tensor var_6866_equation_0 = const()[name = tensor("op_6866_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6866_cast_fp16 = einsum(equation = var_6866_equation_0, values = (var_6424_cast_fp16, var_6768_cast_fp16))[name = tensor("op_6866_cast_fp16")]; tensor var_6868_equation_0 = const()[name = tensor("op_6868_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6868_cast_fp16 = einsum(equation = var_6868_equation_0, values = (var_6424_cast_fp16, var_6769_cast_fp16))[name = tensor("op_6868_cast_fp16")]; tensor var_6870_equation_0 = const()[name = tensor("op_6870_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6870_cast_fp16 = einsum(equation = var_6870_equation_0, values = (var_6424_cast_fp16, var_6770_cast_fp16))[name = tensor("op_6870_cast_fp16")]; tensor var_6872_equation_0 = const()[name = tensor("op_6872_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6872_cast_fp16 = einsum(equation = var_6872_equation_0, values = (var_6424_cast_fp16, var_6771_cast_fp16))[name = tensor("op_6872_cast_fp16")]; tensor var_6874_equation_0 = const()[name = tensor("op_6874_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6874_cast_fp16 = einsum(equation = var_6874_equation_0, values = (var_6424_cast_fp16, var_6772_cast_fp16))[name = tensor("op_6874_cast_fp16")]; tensor var_6876_equation_0 = const()[name = tensor("op_6876_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6876_cast_fp16 = einsum(equation = var_6876_equation_0, values = (var_6428_cast_fp16, var_6773_cast_fp16))[name = tensor("op_6876_cast_fp16")]; tensor var_6878_equation_0 = const()[name = tensor("op_6878_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6878_cast_fp16 = einsum(equation = var_6878_equation_0, values = (var_6428_cast_fp16, var_6774_cast_fp16))[name = tensor("op_6878_cast_fp16")]; tensor var_6880_equation_0 = const()[name = tensor("op_6880_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6880_cast_fp16 = einsum(equation = var_6880_equation_0, values = (var_6428_cast_fp16, var_6775_cast_fp16))[name = tensor("op_6880_cast_fp16")]; tensor var_6882_equation_0 = const()[name = tensor("op_6882_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6882_cast_fp16 = einsum(equation = var_6882_equation_0, values = (var_6428_cast_fp16, var_6776_cast_fp16))[name = tensor("op_6882_cast_fp16")]; tensor var_6884_equation_0 = const()[name = tensor("op_6884_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6884_cast_fp16 = einsum(equation = var_6884_equation_0, values = (var_6428_cast_fp16, var_6777_cast_fp16))[name = tensor("op_6884_cast_fp16")]; tensor var_6886_equation_0 = const()[name = tensor("op_6886_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6886_cast_fp16 = einsum(equation = var_6886_equation_0, values = (var_6428_cast_fp16, var_6778_cast_fp16))[name = tensor("op_6886_cast_fp16")]; tensor var_6888_equation_0 = const()[name = tensor("op_6888_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6888_cast_fp16 = einsum(equation = var_6888_equation_0, values = (var_6432_cast_fp16, var_6779_cast_fp16))[name = tensor("op_6888_cast_fp16")]; tensor var_6890_equation_0 = const()[name = tensor("op_6890_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6890_cast_fp16 = einsum(equation = var_6890_equation_0, values = (var_6432_cast_fp16, var_6780_cast_fp16))[name = tensor("op_6890_cast_fp16")]; tensor var_6892_equation_0 = const()[name = tensor("op_6892_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6892_cast_fp16 = einsum(equation = var_6892_equation_0, values = (var_6432_cast_fp16, var_6781_cast_fp16))[name = tensor("op_6892_cast_fp16")]; tensor var_6894_equation_0 = const()[name = tensor("op_6894_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6894_cast_fp16 = einsum(equation = var_6894_equation_0, values = (var_6432_cast_fp16, var_6782_cast_fp16))[name = tensor("op_6894_cast_fp16")]; tensor var_6896_equation_0 = const()[name = tensor("op_6896_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6896_cast_fp16 = einsum(equation = var_6896_equation_0, values = (var_6432_cast_fp16, var_6783_cast_fp16))[name = tensor("op_6896_cast_fp16")]; tensor var_6898_equation_0 = const()[name = tensor("op_6898_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6898_cast_fp16 = einsum(equation = var_6898_equation_0, values = (var_6432_cast_fp16, var_6784_cast_fp16))[name = tensor("op_6898_cast_fp16")]; tensor var_6900_equation_0 = const()[name = tensor("op_6900_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6900_cast_fp16 = einsum(equation = var_6900_equation_0, values = (var_6436_cast_fp16, var_6785_cast_fp16))[name = tensor("op_6900_cast_fp16")]; tensor var_6902_equation_0 = const()[name = tensor("op_6902_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6902_cast_fp16 = einsum(equation = var_6902_equation_0, values = (var_6436_cast_fp16, var_6786_cast_fp16))[name = tensor("op_6902_cast_fp16")]; tensor var_6904_equation_0 = const()[name = tensor("op_6904_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6904_cast_fp16 = einsum(equation = var_6904_equation_0, values = (var_6436_cast_fp16, var_6787_cast_fp16))[name = tensor("op_6904_cast_fp16")]; tensor var_6906_equation_0 = const()[name = tensor("op_6906_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6906_cast_fp16 = einsum(equation = var_6906_equation_0, values = (var_6436_cast_fp16, var_6788_cast_fp16))[name = tensor("op_6906_cast_fp16")]; tensor var_6908_equation_0 = const()[name = tensor("op_6908_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6908_cast_fp16 = einsum(equation = var_6908_equation_0, values = (var_6436_cast_fp16, var_6789_cast_fp16))[name = tensor("op_6908_cast_fp16")]; tensor var_6910_equation_0 = const()[name = tensor("op_6910_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6910_cast_fp16 = einsum(equation = var_6910_equation_0, values = (var_6436_cast_fp16, var_6790_cast_fp16))[name = tensor("op_6910_cast_fp16")]; tensor var_6912_equation_0 = const()[name = tensor("op_6912_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6912_cast_fp16 = einsum(equation = var_6912_equation_0, values = (var_6440_cast_fp16, var_6791_cast_fp16))[name = tensor("op_6912_cast_fp16")]; tensor var_6914_equation_0 = const()[name = tensor("op_6914_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6914_cast_fp16 = einsum(equation = var_6914_equation_0, values = (var_6440_cast_fp16, var_6792_cast_fp16))[name = tensor("op_6914_cast_fp16")]; tensor var_6916_equation_0 = const()[name = tensor("op_6916_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6916_cast_fp16 = einsum(equation = var_6916_equation_0, values = (var_6440_cast_fp16, var_6793_cast_fp16))[name = tensor("op_6916_cast_fp16")]; tensor var_6918_equation_0 = const()[name = tensor("op_6918_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6918_cast_fp16 = einsum(equation = var_6918_equation_0, values = (var_6440_cast_fp16, var_6794_cast_fp16))[name = tensor("op_6918_cast_fp16")]; tensor var_6920_equation_0 = const()[name = tensor("op_6920_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6920_cast_fp16 = einsum(equation = var_6920_equation_0, values = (var_6440_cast_fp16, var_6795_cast_fp16))[name = tensor("op_6920_cast_fp16")]; tensor var_6922_equation_0 = const()[name = tensor("op_6922_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6922_cast_fp16 = einsum(equation = var_6922_equation_0, values = (var_6440_cast_fp16, var_6796_cast_fp16))[name = tensor("op_6922_cast_fp16")]; tensor var_6924_equation_0 = const()[name = tensor("op_6924_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6924_cast_fp16 = einsum(equation = var_6924_equation_0, values = (var_6444_cast_fp16, var_6797_cast_fp16))[name = tensor("op_6924_cast_fp16")]; tensor var_6926_equation_0 = const()[name = tensor("op_6926_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6926_cast_fp16 = einsum(equation = var_6926_equation_0, values = (var_6444_cast_fp16, var_6798_cast_fp16))[name = tensor("op_6926_cast_fp16")]; tensor var_6928_equation_0 = const()[name = tensor("op_6928_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6928_cast_fp16 = einsum(equation = var_6928_equation_0, values = (var_6444_cast_fp16, var_6799_cast_fp16))[name = tensor("op_6928_cast_fp16")]; tensor var_6930_equation_0 = const()[name = tensor("op_6930_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6930_cast_fp16 = einsum(equation = var_6930_equation_0, values = (var_6444_cast_fp16, var_6800_cast_fp16))[name = tensor("op_6930_cast_fp16")]; tensor var_6932_equation_0 = const()[name = tensor("op_6932_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6932_cast_fp16 = einsum(equation = var_6932_equation_0, values = (var_6444_cast_fp16, var_6801_cast_fp16))[name = tensor("op_6932_cast_fp16")]; tensor var_6934_equation_0 = const()[name = tensor("op_6934_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6934_cast_fp16 = einsum(equation = var_6934_equation_0, values = (var_6444_cast_fp16, var_6802_cast_fp16))[name = tensor("op_6934_cast_fp16")]; tensor var_6936_equation_0 = const()[name = tensor("op_6936_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6936_cast_fp16 = einsum(equation = var_6936_equation_0, values = (var_6448_cast_fp16, var_6803_cast_fp16))[name = tensor("op_6936_cast_fp16")]; tensor var_6938_equation_0 = const()[name = tensor("op_6938_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6938_cast_fp16 = einsum(equation = var_6938_equation_0, values = (var_6448_cast_fp16, var_6804_cast_fp16))[name = tensor("op_6938_cast_fp16")]; tensor var_6940_equation_0 = const()[name = tensor("op_6940_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6940_cast_fp16 = einsum(equation = var_6940_equation_0, values = (var_6448_cast_fp16, var_6805_cast_fp16))[name = tensor("op_6940_cast_fp16")]; tensor var_6942_equation_0 = const()[name = tensor("op_6942_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6942_cast_fp16 = einsum(equation = var_6942_equation_0, values = (var_6448_cast_fp16, var_6806_cast_fp16))[name = tensor("op_6942_cast_fp16")]; tensor var_6944_equation_0 = const()[name = tensor("op_6944_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6944_cast_fp16 = einsum(equation = var_6944_equation_0, values = (var_6448_cast_fp16, var_6807_cast_fp16))[name = tensor("op_6944_cast_fp16")]; tensor var_6946_equation_0 = const()[name = tensor("op_6946_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6946_cast_fp16 = einsum(equation = var_6946_equation_0, values = (var_6448_cast_fp16, var_6808_cast_fp16))[name = tensor("op_6946_cast_fp16")]; tensor var_6948_equation_0 = const()[name = tensor("op_6948_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6948_cast_fp16 = einsum(equation = var_6948_equation_0, values = (var_6452_cast_fp16, var_6809_cast_fp16))[name = tensor("op_6948_cast_fp16")]; tensor var_6950_equation_0 = const()[name = tensor("op_6950_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6950_cast_fp16 = einsum(equation = var_6950_equation_0, values = (var_6452_cast_fp16, var_6810_cast_fp16))[name = tensor("op_6950_cast_fp16")]; tensor var_6952_equation_0 = const()[name = tensor("op_6952_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6952_cast_fp16 = einsum(equation = var_6952_equation_0, values = (var_6452_cast_fp16, var_6811_cast_fp16))[name = tensor("op_6952_cast_fp16")]; tensor var_6954_equation_0 = const()[name = tensor("op_6954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6954_cast_fp16 = einsum(equation = var_6954_equation_0, values = (var_6452_cast_fp16, var_6812_cast_fp16))[name = tensor("op_6954_cast_fp16")]; tensor var_6956_equation_0 = const()[name = tensor("op_6956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6956_cast_fp16 = einsum(equation = var_6956_equation_0, values = (var_6452_cast_fp16, var_6813_cast_fp16))[name = tensor("op_6956_cast_fp16")]; tensor var_6958_equation_0 = const()[name = tensor("op_6958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6958_cast_fp16 = einsum(equation = var_6958_equation_0, values = (var_6452_cast_fp16, var_6814_cast_fp16))[name = tensor("op_6958_cast_fp16")]; tensor var_6960_interleave_0 = const()[name = tensor("op_6960_interleave_0"), val = tensor(false)]; tensor var_6960_cast_fp16 = concat(axis = var_6171, interleave = var_6960_interleave_0, values = (var_6816_cast_fp16, var_6818_cast_fp16, var_6820_cast_fp16, var_6822_cast_fp16, var_6824_cast_fp16, var_6826_cast_fp16))[name = tensor("op_6960_cast_fp16")]; tensor var_6962_interleave_0 = const()[name = tensor("op_6962_interleave_0"), val = tensor(false)]; tensor var_6962_cast_fp16 = concat(axis = var_6171, interleave = var_6962_interleave_0, values = (var_6828_cast_fp16, var_6830_cast_fp16, var_6832_cast_fp16, var_6834_cast_fp16, var_6836_cast_fp16, var_6838_cast_fp16))[name = tensor("op_6962_cast_fp16")]; tensor var_6964_interleave_0 = const()[name = tensor("op_6964_interleave_0"), val = tensor(false)]; tensor var_6964_cast_fp16 = concat(axis = var_6171, interleave = var_6964_interleave_0, values = (var_6840_cast_fp16, var_6842_cast_fp16, var_6844_cast_fp16, var_6846_cast_fp16, var_6848_cast_fp16, var_6850_cast_fp16))[name = tensor("op_6964_cast_fp16")]; tensor var_6966_interleave_0 = const()[name = tensor("op_6966_interleave_0"), val = tensor(false)]; tensor var_6966_cast_fp16 = concat(axis = var_6171, interleave = var_6966_interleave_0, values = (var_6852_cast_fp16, var_6854_cast_fp16, var_6856_cast_fp16, var_6858_cast_fp16, var_6860_cast_fp16, var_6862_cast_fp16))[name = tensor("op_6966_cast_fp16")]; tensor var_6968_interleave_0 = const()[name = tensor("op_6968_interleave_0"), val = tensor(false)]; tensor var_6968_cast_fp16 = concat(axis = var_6171, interleave = var_6968_interleave_0, values = (var_6864_cast_fp16, var_6866_cast_fp16, var_6868_cast_fp16, var_6870_cast_fp16, var_6872_cast_fp16, var_6874_cast_fp16))[name = tensor("op_6968_cast_fp16")]; tensor var_6970_interleave_0 = const()[name = tensor("op_6970_interleave_0"), val = tensor(false)]; tensor var_6970_cast_fp16 = concat(axis = var_6171, interleave = var_6970_interleave_0, values = (var_6876_cast_fp16, var_6878_cast_fp16, var_6880_cast_fp16, var_6882_cast_fp16, var_6884_cast_fp16, var_6886_cast_fp16))[name = tensor("op_6970_cast_fp16")]; tensor var_6972_interleave_0 = const()[name = tensor("op_6972_interleave_0"), val = tensor(false)]; tensor var_6972_cast_fp16 = concat(axis = var_6171, interleave = var_6972_interleave_0, values = (var_6888_cast_fp16, var_6890_cast_fp16, var_6892_cast_fp16, var_6894_cast_fp16, var_6896_cast_fp16, var_6898_cast_fp16))[name = tensor("op_6972_cast_fp16")]; tensor var_6974_interleave_0 = const()[name = tensor("op_6974_interleave_0"), val = tensor(false)]; tensor var_6974_cast_fp16 = concat(axis = var_6171, interleave = var_6974_interleave_0, values = (var_6900_cast_fp16, var_6902_cast_fp16, var_6904_cast_fp16, var_6906_cast_fp16, var_6908_cast_fp16, var_6910_cast_fp16))[name = tensor("op_6974_cast_fp16")]; tensor var_6976_interleave_0 = const()[name = tensor("op_6976_interleave_0"), val = tensor(false)]; tensor var_6976_cast_fp16 = concat(axis = var_6171, interleave = var_6976_interleave_0, values = (var_6912_cast_fp16, var_6914_cast_fp16, var_6916_cast_fp16, var_6918_cast_fp16, var_6920_cast_fp16, var_6922_cast_fp16))[name = tensor("op_6976_cast_fp16")]; tensor var_6978_interleave_0 = const()[name = tensor("op_6978_interleave_0"), val = tensor(false)]; tensor var_6978_cast_fp16 = concat(axis = var_6171, interleave = var_6978_interleave_0, values = (var_6924_cast_fp16, var_6926_cast_fp16, var_6928_cast_fp16, var_6930_cast_fp16, var_6932_cast_fp16, var_6934_cast_fp16))[name = tensor("op_6978_cast_fp16")]; tensor var_6980_interleave_0 = const()[name = tensor("op_6980_interleave_0"), val = tensor(false)]; tensor var_6980_cast_fp16 = concat(axis = var_6171, interleave = var_6980_interleave_0, values = (var_6936_cast_fp16, var_6938_cast_fp16, var_6940_cast_fp16, var_6942_cast_fp16, var_6944_cast_fp16, var_6946_cast_fp16))[name = tensor("op_6980_cast_fp16")]; tensor var_6982_interleave_0 = const()[name = tensor("op_6982_interleave_0"), val = tensor(false)]; tensor var_6982_cast_fp16 = concat(axis = var_6171, interleave = var_6982_interleave_0, values = (var_6948_cast_fp16, var_6950_cast_fp16, var_6952_cast_fp16, var_6954_cast_fp16, var_6956_cast_fp16, var_6958_cast_fp16))[name = tensor("op_6982_cast_fp16")]; tensor input_57_interleave_0 = const()[name = tensor("input_57_interleave_0"), val = tensor(false)]; tensor input_57_cast_fp16 = concat(axis = var_6187, interleave = input_57_interleave_0, values = (var_6960_cast_fp16, var_6962_cast_fp16, var_6964_cast_fp16, var_6966_cast_fp16, var_6968_cast_fp16, var_6970_cast_fp16, var_6972_cast_fp16, var_6974_cast_fp16, var_6976_cast_fp16, var_6978_cast_fp16, var_6980_cast_fp16, var_6982_cast_fp16))[name = tensor("input_57_cast_fp16")]; tensor obj_31_pad_type_0 = const()[name = tensor("obj_31_pad_type_0"), val = tensor("valid")]; tensor obj_31_strides_0 = const()[name = tensor("obj_31_strides_0"), val = tensor([1, 1])]; tensor obj_31_pad_0 = const()[name = tensor("obj_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_31_dilations_0 = const()[name = tensor("obj_31_dilations_0"), val = tensor([1, 1])]; tensor obj_31_groups_0 = const()[name = tensor("obj_31_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108989952)))]; tensor layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110169664)))]; tensor obj_31_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = obj_31_dilations_0, groups = obj_31_groups_0, pad = obj_31_pad_0, pad_type = obj_31_pad_type_0, strides = obj_31_strides_0, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("obj_31_cast_fp16")]; tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; tensor var_7001_to_fp16 = const()[name = tensor("op_7001_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_7001_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; tensor input_59_gamma_0_to_fp16 = const()[name = tensor("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110171264)))]; tensor input_59_beta_0_to_fp16 = const()[name = tensor("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110172864)))]; tensor input_59_epsilon_0_to_fp16 = const()[name = tensor("input_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor input_61_pad_type_0 = const()[name = tensor("input_61_pad_type_0"), val = tensor("valid")]; tensor input_61_strides_0 = const()[name = tensor("input_61_strides_0"), val = tensor([1, 1])]; tensor input_61_pad_0 = const()[name = tensor("input_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_61_dilations_0 = const()[name = tensor("input_61_dilations_0"), val = tensor([1, 1])]; tensor input_61_groups_0 = const()[name = tensor("input_61_groups_0"), val = tensor(1)]; tensor layers_7_fc1_weight_to_fp16 = const()[name = tensor("layers_7_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110174464)))]; tensor layers_7_fc1_bias_to_fp16 = const()[name = tensor("layers_7_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114893120)))]; tensor input_61_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = input_61_dilations_0, groups = input_61_groups_0, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = input_61_strides_0, weight = layers_7_fc1_weight_to_fp16, x = input_59_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor hidden_states_19_pad_type_0 = const()[name = tensor("hidden_states_19_pad_type_0"), val = tensor("valid")]; tensor hidden_states_19_strides_0 = const()[name = tensor("hidden_states_19_strides_0"), val = tensor([1, 1])]; tensor hidden_states_19_pad_0 = const()[name = tensor("hidden_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_19_dilations_0 = const()[name = tensor("hidden_states_19_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_19_groups_0 = const()[name = tensor("hidden_states_19_groups_0"), val = tensor(1)]; tensor layers_7_fc2_weight_to_fp16 = const()[name = tensor("layers_7_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114899328)))]; tensor layers_7_fc2_bias_to_fp16 = const()[name = tensor("layers_7_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119617984)))]; tensor hidden_states_19_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = hidden_states_19_dilations_0, groups = hidden_states_19_groups_0, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = hidden_states_19_strides_0, weight = layers_7_fc2_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; tensor var_7033 = const()[name = tensor("op_7033"), val = tensor(3)]; tensor var_7049 = const()[name = tensor("op_7049"), val = tensor(1)]; tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; tensor var_7066_to_fp16 = const()[name = tensor("op_7066_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_7066_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; tensor obj_33_gamma_0_to_fp16 = const()[name = tensor("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119619584)))]; tensor obj_33_beta_0_to_fp16 = const()[name = tensor("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119621184)))]; tensor obj_33_epsilon_0_to_fp16 = const()[name = tensor("obj_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_33_cast_fp16")]; tensor query_17_pad_type_0 = const()[name = tensor("query_17_pad_type_0"), val = tensor("valid")]; tensor query_17_strides_0 = const()[name = tensor("query_17_strides_0"), val = tensor([1, 1])]; tensor query_17_pad_0 = const()[name = tensor("query_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_17_dilations_0 = const()[name = tensor("query_17_dilations_0"), val = tensor([1, 1])]; tensor query_17_groups_0 = const()[name = tensor("query_17_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119622784)))]; tensor layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120802496)))]; tensor query_17_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = query_17_dilations_0, groups = query_17_groups_0, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = query_17_strides_0, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor("query_17_cast_fp16")]; tensor key_17_pad_type_0 = const()[name = tensor("key_17_pad_type_0"), val = tensor("valid")]; tensor key_17_strides_0 = const()[name = tensor("key_17_strides_0"), val = tensor([1, 1])]; tensor key_17_pad_0 = const()[name = tensor("key_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_17_dilations_0 = const()[name = tensor("key_17_dilations_0"), val = tensor([1, 1])]; tensor key_17_groups_0 = const()[name = tensor("key_17_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120804096)))]; tensor key_17_cast_fp16 = conv(dilations = key_17_dilations_0, groups = key_17_groups_0, pad = key_17_pad_0, pad_type = key_17_pad_type_0, strides = key_17_strides_0, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor("key_17_cast_fp16")]; tensor value_17_pad_type_0 = const()[name = tensor("value_17_pad_type_0"), val = tensor("valid")]; tensor value_17_strides_0 = const()[name = tensor("value_17_strides_0"), val = tensor([1, 1])]; tensor value_17_pad_0 = const()[name = tensor("value_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_17_dilations_0 = const()[name = tensor("value_17_dilations_0"), val = tensor([1, 1])]; tensor value_17_groups_0 = const()[name = tensor("value_17_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121983808)))]; tensor layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123163520)))]; tensor value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = value_17_dilations_0, groups = value_17_groups_0, pad = value_17_pad_0, pad_type = value_17_pad_type_0, strides = value_17_strides_0, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor("value_17_cast_fp16")]; tensor var_7101_begin_0 = const()[name = tensor("op_7101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7101_end_0 = const()[name = tensor("op_7101_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7101_end_mask_0 = const()[name = tensor("op_7101_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7101_cast_fp16 = slice_by_index(begin = var_7101_begin_0, end = var_7101_end_0, end_mask = var_7101_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7101_cast_fp16")]; tensor var_7105_begin_0 = const()[name = tensor("op_7105_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_7105_end_0 = const()[name = tensor("op_7105_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_7105_end_mask_0 = const()[name = tensor("op_7105_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7105_cast_fp16 = slice_by_index(begin = var_7105_begin_0, end = var_7105_end_0, end_mask = var_7105_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7105_cast_fp16")]; tensor var_7109_begin_0 = const()[name = tensor("op_7109_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_7109_end_0 = const()[name = tensor("op_7109_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_7109_end_mask_0 = const()[name = tensor("op_7109_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7109_cast_fp16 = slice_by_index(begin = var_7109_begin_0, end = var_7109_end_0, end_mask = var_7109_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7109_cast_fp16")]; tensor var_7113_begin_0 = const()[name = tensor("op_7113_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_7113_end_0 = const()[name = tensor("op_7113_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_7113_end_mask_0 = const()[name = tensor("op_7113_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7113_cast_fp16 = slice_by_index(begin = var_7113_begin_0, end = var_7113_end_0, end_mask = var_7113_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7113_cast_fp16")]; tensor var_7117_begin_0 = const()[name = tensor("op_7117_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_7117_end_0 = const()[name = tensor("op_7117_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_7117_end_mask_0 = const()[name = tensor("op_7117_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7117_cast_fp16 = slice_by_index(begin = var_7117_begin_0, end = var_7117_end_0, end_mask = var_7117_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7117_cast_fp16")]; tensor var_7121_begin_0 = const()[name = tensor("op_7121_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7121_end_0 = const()[name = tensor("op_7121_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_7121_end_mask_0 = const()[name = tensor("op_7121_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7121_cast_fp16 = slice_by_index(begin = var_7121_begin_0, end = var_7121_end_0, end_mask = var_7121_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7121_cast_fp16")]; tensor var_7125_begin_0 = const()[name = tensor("op_7125_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_7125_end_0 = const()[name = tensor("op_7125_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_7125_end_mask_0 = const()[name = tensor("op_7125_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7125_cast_fp16 = slice_by_index(begin = var_7125_begin_0, end = var_7125_end_0, end_mask = var_7125_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7125_cast_fp16")]; tensor var_7129_begin_0 = const()[name = tensor("op_7129_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_7129_end_0 = const()[name = tensor("op_7129_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_7129_end_mask_0 = const()[name = tensor("op_7129_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7129_cast_fp16 = slice_by_index(begin = var_7129_begin_0, end = var_7129_end_0, end_mask = var_7129_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7129_cast_fp16")]; tensor var_7133_begin_0 = const()[name = tensor("op_7133_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_7133_end_0 = const()[name = tensor("op_7133_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_7133_end_mask_0 = const()[name = tensor("op_7133_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7133_cast_fp16 = slice_by_index(begin = var_7133_begin_0, end = var_7133_end_0, end_mask = var_7133_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7133_cast_fp16")]; tensor var_7137_begin_0 = const()[name = tensor("op_7137_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_7137_end_0 = const()[name = tensor("op_7137_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_7137_end_mask_0 = const()[name = tensor("op_7137_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7137_cast_fp16 = slice_by_index(begin = var_7137_begin_0, end = var_7137_end_0, end_mask = var_7137_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7137_cast_fp16")]; tensor var_7141_begin_0 = const()[name = tensor("op_7141_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7141_end_0 = const()[name = tensor("op_7141_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_7141_end_mask_0 = const()[name = tensor("op_7141_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7141_cast_fp16 = slice_by_index(begin = var_7141_begin_0, end = var_7141_end_0, end_mask = var_7141_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7141_cast_fp16")]; tensor var_7145_begin_0 = const()[name = tensor("op_7145_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_7145_end_0 = const()[name = tensor("op_7145_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_7145_end_mask_0 = const()[name = tensor("op_7145_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7145_cast_fp16 = slice_by_index(begin = var_7145_begin_0, end = var_7145_end_0, end_mask = var_7145_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_7145_cast_fp16")]; tensor var_7148_begin_0 = const()[name = tensor("op_7148_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7148_end_0 = const()[name = tensor("op_7148_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7148_end_mask_0 = const()[name = tensor("op_7148_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7148_cast_fp16 = slice_by_index(begin = var_7148_begin_0, end = var_7148_end_0, end_mask = var_7148_end_mask_0, x = var_7101_cast_fp16)[name = tensor("op_7148_cast_fp16")]; tensor var_7149_begin_0 = const()[name = tensor("op_7149_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7149_end_0 = const()[name = tensor("op_7149_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7149_end_mask_0 = const()[name = tensor("op_7149_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7149_cast_fp16 = slice_by_index(begin = var_7149_begin_0, end = var_7149_end_0, end_mask = var_7149_end_mask_0, x = var_7101_cast_fp16)[name = tensor("op_7149_cast_fp16")]; tensor var_7150_begin_0 = const()[name = tensor("op_7150_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7150_end_0 = const()[name = tensor("op_7150_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7150_end_mask_0 = const()[name = tensor("op_7150_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7150_cast_fp16 = slice_by_index(begin = var_7150_begin_0, end = var_7150_end_0, end_mask = var_7150_end_mask_0, x = var_7101_cast_fp16)[name = tensor("op_7150_cast_fp16")]; tensor var_7151_begin_0 = const()[name = tensor("op_7151_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7151_end_0 = const()[name = tensor("op_7151_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7151_end_mask_0 = const()[name = tensor("op_7151_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7151_cast_fp16 = slice_by_index(begin = var_7151_begin_0, end = var_7151_end_0, end_mask = var_7151_end_mask_0, x = var_7101_cast_fp16)[name = tensor("op_7151_cast_fp16")]; tensor var_7152_begin_0 = const()[name = tensor("op_7152_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7152_end_0 = const()[name = tensor("op_7152_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7152_end_mask_0 = const()[name = tensor("op_7152_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7152_cast_fp16 = slice_by_index(begin = var_7152_begin_0, end = var_7152_end_0, end_mask = var_7152_end_mask_0, x = var_7101_cast_fp16)[name = tensor("op_7152_cast_fp16")]; tensor var_7153_begin_0 = const()[name = tensor("op_7153_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7153_end_0 = const()[name = tensor("op_7153_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7153_end_mask_0 = const()[name = tensor("op_7153_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7153_cast_fp16 = slice_by_index(begin = var_7153_begin_0, end = var_7153_end_0, end_mask = var_7153_end_mask_0, x = var_7101_cast_fp16)[name = tensor("op_7153_cast_fp16")]; tensor var_7154_begin_0 = const()[name = tensor("op_7154_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7154_end_0 = const()[name = tensor("op_7154_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7154_end_mask_0 = const()[name = tensor("op_7154_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7154_cast_fp16 = slice_by_index(begin = var_7154_begin_0, end = var_7154_end_0, end_mask = var_7154_end_mask_0, x = var_7105_cast_fp16)[name = tensor("op_7154_cast_fp16")]; tensor var_7155_begin_0 = const()[name = tensor("op_7155_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7155_end_0 = const()[name = tensor("op_7155_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7155_end_mask_0 = const()[name = tensor("op_7155_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7155_cast_fp16 = slice_by_index(begin = var_7155_begin_0, end = var_7155_end_0, end_mask = var_7155_end_mask_0, x = var_7105_cast_fp16)[name = tensor("op_7155_cast_fp16")]; tensor var_7156_begin_0 = const()[name = tensor("op_7156_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7156_end_0 = const()[name = tensor("op_7156_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7156_end_mask_0 = const()[name = tensor("op_7156_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7156_cast_fp16 = slice_by_index(begin = var_7156_begin_0, end = var_7156_end_0, end_mask = var_7156_end_mask_0, x = var_7105_cast_fp16)[name = tensor("op_7156_cast_fp16")]; tensor var_7157_begin_0 = const()[name = tensor("op_7157_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7157_end_0 = const()[name = tensor("op_7157_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7157_end_mask_0 = const()[name = tensor("op_7157_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7157_cast_fp16 = slice_by_index(begin = var_7157_begin_0, end = var_7157_end_0, end_mask = var_7157_end_mask_0, x = var_7105_cast_fp16)[name = tensor("op_7157_cast_fp16")]; tensor var_7158_begin_0 = const()[name = tensor("op_7158_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7158_end_0 = const()[name = tensor("op_7158_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7158_end_mask_0 = const()[name = tensor("op_7158_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7158_cast_fp16 = slice_by_index(begin = var_7158_begin_0, end = var_7158_end_0, end_mask = var_7158_end_mask_0, x = var_7105_cast_fp16)[name = tensor("op_7158_cast_fp16")]; tensor var_7159_begin_0 = const()[name = tensor("op_7159_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7159_end_0 = const()[name = tensor("op_7159_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7159_end_mask_0 = const()[name = tensor("op_7159_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7159_cast_fp16 = slice_by_index(begin = var_7159_begin_0, end = var_7159_end_0, end_mask = var_7159_end_mask_0, x = var_7105_cast_fp16)[name = tensor("op_7159_cast_fp16")]; tensor var_7160_begin_0 = const()[name = tensor("op_7160_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7160_end_0 = const()[name = tensor("op_7160_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7160_end_mask_0 = const()[name = tensor("op_7160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7160_cast_fp16 = slice_by_index(begin = var_7160_begin_0, end = var_7160_end_0, end_mask = var_7160_end_mask_0, x = var_7109_cast_fp16)[name = tensor("op_7160_cast_fp16")]; tensor var_7161_begin_0 = const()[name = tensor("op_7161_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7161_end_0 = const()[name = tensor("op_7161_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7161_end_mask_0 = const()[name = tensor("op_7161_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7161_cast_fp16 = slice_by_index(begin = var_7161_begin_0, end = var_7161_end_0, end_mask = var_7161_end_mask_0, x = var_7109_cast_fp16)[name = tensor("op_7161_cast_fp16")]; tensor var_7162_begin_0 = const()[name = tensor("op_7162_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7162_end_0 = const()[name = tensor("op_7162_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7162_end_mask_0 = const()[name = tensor("op_7162_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7162_cast_fp16 = slice_by_index(begin = var_7162_begin_0, end = var_7162_end_0, end_mask = var_7162_end_mask_0, x = var_7109_cast_fp16)[name = tensor("op_7162_cast_fp16")]; tensor var_7163_begin_0 = const()[name = tensor("op_7163_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7163_end_0 = const()[name = tensor("op_7163_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7163_end_mask_0 = const()[name = tensor("op_7163_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7163_cast_fp16 = slice_by_index(begin = var_7163_begin_0, end = var_7163_end_0, end_mask = var_7163_end_mask_0, x = var_7109_cast_fp16)[name = tensor("op_7163_cast_fp16")]; tensor var_7164_begin_0 = const()[name = tensor("op_7164_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7164_end_0 = const()[name = tensor("op_7164_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7164_end_mask_0 = const()[name = tensor("op_7164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7164_cast_fp16 = slice_by_index(begin = var_7164_begin_0, end = var_7164_end_0, end_mask = var_7164_end_mask_0, x = var_7109_cast_fp16)[name = tensor("op_7164_cast_fp16")]; tensor var_7165_begin_0 = const()[name = tensor("op_7165_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7165_end_0 = const()[name = tensor("op_7165_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7165_end_mask_0 = const()[name = tensor("op_7165_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7165_cast_fp16 = slice_by_index(begin = var_7165_begin_0, end = var_7165_end_0, end_mask = var_7165_end_mask_0, x = var_7109_cast_fp16)[name = tensor("op_7165_cast_fp16")]; tensor var_7166_begin_0 = const()[name = tensor("op_7166_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7166_end_0 = const()[name = tensor("op_7166_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7166_end_mask_0 = const()[name = tensor("op_7166_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7166_cast_fp16 = slice_by_index(begin = var_7166_begin_0, end = var_7166_end_0, end_mask = var_7166_end_mask_0, x = var_7113_cast_fp16)[name = tensor("op_7166_cast_fp16")]; tensor var_7167_begin_0 = const()[name = tensor("op_7167_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7167_end_0 = const()[name = tensor("op_7167_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7167_end_mask_0 = const()[name = tensor("op_7167_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7167_cast_fp16 = slice_by_index(begin = var_7167_begin_0, end = var_7167_end_0, end_mask = var_7167_end_mask_0, x = var_7113_cast_fp16)[name = tensor("op_7167_cast_fp16")]; tensor var_7168_begin_0 = const()[name = tensor("op_7168_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7168_end_0 = const()[name = tensor("op_7168_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7168_end_mask_0 = const()[name = tensor("op_7168_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7168_cast_fp16 = slice_by_index(begin = var_7168_begin_0, end = var_7168_end_0, end_mask = var_7168_end_mask_0, x = var_7113_cast_fp16)[name = tensor("op_7168_cast_fp16")]; tensor var_7169_begin_0 = const()[name = tensor("op_7169_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7169_end_0 = const()[name = tensor("op_7169_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7169_end_mask_0 = const()[name = tensor("op_7169_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7169_cast_fp16 = slice_by_index(begin = var_7169_begin_0, end = var_7169_end_0, end_mask = var_7169_end_mask_0, x = var_7113_cast_fp16)[name = tensor("op_7169_cast_fp16")]; tensor var_7170_begin_0 = const()[name = tensor("op_7170_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7170_end_0 = const()[name = tensor("op_7170_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7170_end_mask_0 = const()[name = tensor("op_7170_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7170_cast_fp16 = slice_by_index(begin = var_7170_begin_0, end = var_7170_end_0, end_mask = var_7170_end_mask_0, x = var_7113_cast_fp16)[name = tensor("op_7170_cast_fp16")]; tensor var_7171_begin_0 = const()[name = tensor("op_7171_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7171_end_0 = const()[name = tensor("op_7171_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7171_end_mask_0 = const()[name = tensor("op_7171_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7171_cast_fp16 = slice_by_index(begin = var_7171_begin_0, end = var_7171_end_0, end_mask = var_7171_end_mask_0, x = var_7113_cast_fp16)[name = tensor("op_7171_cast_fp16")]; tensor var_7172_begin_0 = const()[name = tensor("op_7172_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7172_end_0 = const()[name = tensor("op_7172_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7172_end_mask_0 = const()[name = tensor("op_7172_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7172_cast_fp16 = slice_by_index(begin = var_7172_begin_0, end = var_7172_end_0, end_mask = var_7172_end_mask_0, x = var_7117_cast_fp16)[name = tensor("op_7172_cast_fp16")]; tensor var_7173_begin_0 = const()[name = tensor("op_7173_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7173_end_0 = const()[name = tensor("op_7173_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7173_end_mask_0 = const()[name = tensor("op_7173_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7173_cast_fp16 = slice_by_index(begin = var_7173_begin_0, end = var_7173_end_0, end_mask = var_7173_end_mask_0, x = var_7117_cast_fp16)[name = tensor("op_7173_cast_fp16")]; tensor var_7174_begin_0 = const()[name = tensor("op_7174_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7174_end_0 = const()[name = tensor("op_7174_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7174_end_mask_0 = const()[name = tensor("op_7174_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7174_cast_fp16 = slice_by_index(begin = var_7174_begin_0, end = var_7174_end_0, end_mask = var_7174_end_mask_0, x = var_7117_cast_fp16)[name = tensor("op_7174_cast_fp16")]; tensor var_7175_begin_0 = const()[name = tensor("op_7175_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7175_end_0 = const()[name = tensor("op_7175_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7175_end_mask_0 = const()[name = tensor("op_7175_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7175_cast_fp16 = slice_by_index(begin = var_7175_begin_0, end = var_7175_end_0, end_mask = var_7175_end_mask_0, x = var_7117_cast_fp16)[name = tensor("op_7175_cast_fp16")]; tensor var_7176_begin_0 = const()[name = tensor("op_7176_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7176_end_0 = const()[name = tensor("op_7176_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7176_end_mask_0 = const()[name = tensor("op_7176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7176_cast_fp16 = slice_by_index(begin = var_7176_begin_0, end = var_7176_end_0, end_mask = var_7176_end_mask_0, x = var_7117_cast_fp16)[name = tensor("op_7176_cast_fp16")]; tensor var_7177_begin_0 = const()[name = tensor("op_7177_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7177_end_0 = const()[name = tensor("op_7177_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7177_end_mask_0 = const()[name = tensor("op_7177_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7177_cast_fp16 = slice_by_index(begin = var_7177_begin_0, end = var_7177_end_0, end_mask = var_7177_end_mask_0, x = var_7117_cast_fp16)[name = tensor("op_7177_cast_fp16")]; tensor var_7178_begin_0 = const()[name = tensor("op_7178_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7178_end_0 = const()[name = tensor("op_7178_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7178_end_mask_0 = const()[name = tensor("op_7178_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7178_cast_fp16 = slice_by_index(begin = var_7178_begin_0, end = var_7178_end_0, end_mask = var_7178_end_mask_0, x = var_7121_cast_fp16)[name = tensor("op_7178_cast_fp16")]; tensor var_7179_begin_0 = const()[name = tensor("op_7179_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7179_end_0 = const()[name = tensor("op_7179_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7179_end_mask_0 = const()[name = tensor("op_7179_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7179_cast_fp16 = slice_by_index(begin = var_7179_begin_0, end = var_7179_end_0, end_mask = var_7179_end_mask_0, x = var_7121_cast_fp16)[name = tensor("op_7179_cast_fp16")]; tensor var_7180_begin_0 = const()[name = tensor("op_7180_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7180_end_0 = const()[name = tensor("op_7180_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7180_end_mask_0 = const()[name = tensor("op_7180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7180_cast_fp16 = slice_by_index(begin = var_7180_begin_0, end = var_7180_end_0, end_mask = var_7180_end_mask_0, x = var_7121_cast_fp16)[name = tensor("op_7180_cast_fp16")]; tensor var_7181_begin_0 = const()[name = tensor("op_7181_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7181_end_0 = const()[name = tensor("op_7181_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7181_end_mask_0 = const()[name = tensor("op_7181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7181_cast_fp16 = slice_by_index(begin = var_7181_begin_0, end = var_7181_end_0, end_mask = var_7181_end_mask_0, x = var_7121_cast_fp16)[name = tensor("op_7181_cast_fp16")]; tensor var_7182_begin_0 = const()[name = tensor("op_7182_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7182_end_0 = const()[name = tensor("op_7182_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7182_end_mask_0 = const()[name = tensor("op_7182_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7182_cast_fp16 = slice_by_index(begin = var_7182_begin_0, end = var_7182_end_0, end_mask = var_7182_end_mask_0, x = var_7121_cast_fp16)[name = tensor("op_7182_cast_fp16")]; tensor var_7183_begin_0 = const()[name = tensor("op_7183_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7183_end_0 = const()[name = tensor("op_7183_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7183_end_mask_0 = const()[name = tensor("op_7183_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7183_cast_fp16 = slice_by_index(begin = var_7183_begin_0, end = var_7183_end_0, end_mask = var_7183_end_mask_0, x = var_7121_cast_fp16)[name = tensor("op_7183_cast_fp16")]; tensor var_7184_begin_0 = const()[name = tensor("op_7184_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7184_end_0 = const()[name = tensor("op_7184_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7184_end_mask_0 = const()[name = tensor("op_7184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7184_cast_fp16 = slice_by_index(begin = var_7184_begin_0, end = var_7184_end_0, end_mask = var_7184_end_mask_0, x = var_7125_cast_fp16)[name = tensor("op_7184_cast_fp16")]; tensor var_7185_begin_0 = const()[name = tensor("op_7185_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7185_end_0 = const()[name = tensor("op_7185_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7185_end_mask_0 = const()[name = tensor("op_7185_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7185_cast_fp16 = slice_by_index(begin = var_7185_begin_0, end = var_7185_end_0, end_mask = var_7185_end_mask_0, x = var_7125_cast_fp16)[name = tensor("op_7185_cast_fp16")]; tensor var_7186_begin_0 = const()[name = tensor("op_7186_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7186_end_0 = const()[name = tensor("op_7186_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7186_end_mask_0 = const()[name = tensor("op_7186_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7186_cast_fp16 = slice_by_index(begin = var_7186_begin_0, end = var_7186_end_0, end_mask = var_7186_end_mask_0, x = var_7125_cast_fp16)[name = tensor("op_7186_cast_fp16")]; tensor var_7187_begin_0 = const()[name = tensor("op_7187_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7187_end_0 = const()[name = tensor("op_7187_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7187_end_mask_0 = const()[name = tensor("op_7187_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7187_cast_fp16 = slice_by_index(begin = var_7187_begin_0, end = var_7187_end_0, end_mask = var_7187_end_mask_0, x = var_7125_cast_fp16)[name = tensor("op_7187_cast_fp16")]; tensor var_7188_begin_0 = const()[name = tensor("op_7188_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7188_end_0 = const()[name = tensor("op_7188_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7188_end_mask_0 = const()[name = tensor("op_7188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7188_cast_fp16 = slice_by_index(begin = var_7188_begin_0, end = var_7188_end_0, end_mask = var_7188_end_mask_0, x = var_7125_cast_fp16)[name = tensor("op_7188_cast_fp16")]; tensor var_7189_begin_0 = const()[name = tensor("op_7189_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7189_end_0 = const()[name = tensor("op_7189_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7189_end_mask_0 = const()[name = tensor("op_7189_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7189_cast_fp16 = slice_by_index(begin = var_7189_begin_0, end = var_7189_end_0, end_mask = var_7189_end_mask_0, x = var_7125_cast_fp16)[name = tensor("op_7189_cast_fp16")]; tensor var_7190_begin_0 = const()[name = tensor("op_7190_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7190_end_0 = const()[name = tensor("op_7190_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7190_end_mask_0 = const()[name = tensor("op_7190_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7190_cast_fp16 = slice_by_index(begin = var_7190_begin_0, end = var_7190_end_0, end_mask = var_7190_end_mask_0, x = var_7129_cast_fp16)[name = tensor("op_7190_cast_fp16")]; tensor var_7191_begin_0 = const()[name = tensor("op_7191_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7191_end_0 = const()[name = tensor("op_7191_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7191_end_mask_0 = const()[name = tensor("op_7191_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7191_cast_fp16 = slice_by_index(begin = var_7191_begin_0, end = var_7191_end_0, end_mask = var_7191_end_mask_0, x = var_7129_cast_fp16)[name = tensor("op_7191_cast_fp16")]; tensor var_7192_begin_0 = const()[name = tensor("op_7192_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7192_end_0 = const()[name = tensor("op_7192_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7192_end_mask_0 = const()[name = tensor("op_7192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7192_cast_fp16 = slice_by_index(begin = var_7192_begin_0, end = var_7192_end_0, end_mask = var_7192_end_mask_0, x = var_7129_cast_fp16)[name = tensor("op_7192_cast_fp16")]; tensor var_7193_begin_0 = const()[name = tensor("op_7193_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7193_end_0 = const()[name = tensor("op_7193_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7193_end_mask_0 = const()[name = tensor("op_7193_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7193_cast_fp16 = slice_by_index(begin = var_7193_begin_0, end = var_7193_end_0, end_mask = var_7193_end_mask_0, x = var_7129_cast_fp16)[name = tensor("op_7193_cast_fp16")]; tensor var_7194_begin_0 = const()[name = tensor("op_7194_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7194_end_0 = const()[name = tensor("op_7194_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7194_end_mask_0 = const()[name = tensor("op_7194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7194_cast_fp16 = slice_by_index(begin = var_7194_begin_0, end = var_7194_end_0, end_mask = var_7194_end_mask_0, x = var_7129_cast_fp16)[name = tensor("op_7194_cast_fp16")]; tensor var_7195_begin_0 = const()[name = tensor("op_7195_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7195_end_0 = const()[name = tensor("op_7195_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7195_end_mask_0 = const()[name = tensor("op_7195_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7195_cast_fp16 = slice_by_index(begin = var_7195_begin_0, end = var_7195_end_0, end_mask = var_7195_end_mask_0, x = var_7129_cast_fp16)[name = tensor("op_7195_cast_fp16")]; tensor var_7196_begin_0 = const()[name = tensor("op_7196_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7196_end_0 = const()[name = tensor("op_7196_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7196_end_mask_0 = const()[name = tensor("op_7196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7196_cast_fp16 = slice_by_index(begin = var_7196_begin_0, end = var_7196_end_0, end_mask = var_7196_end_mask_0, x = var_7133_cast_fp16)[name = tensor("op_7196_cast_fp16")]; tensor var_7197_begin_0 = const()[name = tensor("op_7197_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7197_end_0 = const()[name = tensor("op_7197_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7197_end_mask_0 = const()[name = tensor("op_7197_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7197_cast_fp16 = slice_by_index(begin = var_7197_begin_0, end = var_7197_end_0, end_mask = var_7197_end_mask_0, x = var_7133_cast_fp16)[name = tensor("op_7197_cast_fp16")]; tensor var_7198_begin_0 = const()[name = tensor("op_7198_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7198_end_0 = const()[name = tensor("op_7198_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7198_end_mask_0 = const()[name = tensor("op_7198_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7198_cast_fp16 = slice_by_index(begin = var_7198_begin_0, end = var_7198_end_0, end_mask = var_7198_end_mask_0, x = var_7133_cast_fp16)[name = tensor("op_7198_cast_fp16")]; tensor var_7199_begin_0 = const()[name = tensor("op_7199_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7199_end_0 = const()[name = tensor("op_7199_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7199_end_mask_0 = const()[name = tensor("op_7199_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7199_cast_fp16 = slice_by_index(begin = var_7199_begin_0, end = var_7199_end_0, end_mask = var_7199_end_mask_0, x = var_7133_cast_fp16)[name = tensor("op_7199_cast_fp16")]; tensor var_7200_begin_0 = const()[name = tensor("op_7200_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7200_end_0 = const()[name = tensor("op_7200_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7200_end_mask_0 = const()[name = tensor("op_7200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7200_cast_fp16 = slice_by_index(begin = var_7200_begin_0, end = var_7200_end_0, end_mask = var_7200_end_mask_0, x = var_7133_cast_fp16)[name = tensor("op_7200_cast_fp16")]; tensor var_7201_begin_0 = const()[name = tensor("op_7201_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7201_end_0 = const()[name = tensor("op_7201_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7201_end_mask_0 = const()[name = tensor("op_7201_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7201_cast_fp16 = slice_by_index(begin = var_7201_begin_0, end = var_7201_end_0, end_mask = var_7201_end_mask_0, x = var_7133_cast_fp16)[name = tensor("op_7201_cast_fp16")]; tensor var_7202_begin_0 = const()[name = tensor("op_7202_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7202_end_0 = const()[name = tensor("op_7202_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7202_end_mask_0 = const()[name = tensor("op_7202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7202_cast_fp16 = slice_by_index(begin = var_7202_begin_0, end = var_7202_end_0, end_mask = var_7202_end_mask_0, x = var_7137_cast_fp16)[name = tensor("op_7202_cast_fp16")]; tensor var_7203_begin_0 = const()[name = tensor("op_7203_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7203_end_0 = const()[name = tensor("op_7203_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7203_end_mask_0 = const()[name = tensor("op_7203_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7203_cast_fp16 = slice_by_index(begin = var_7203_begin_0, end = var_7203_end_0, end_mask = var_7203_end_mask_0, x = var_7137_cast_fp16)[name = tensor("op_7203_cast_fp16")]; tensor var_7204_begin_0 = const()[name = tensor("op_7204_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7204_end_0 = const()[name = tensor("op_7204_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7204_end_mask_0 = const()[name = tensor("op_7204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7204_cast_fp16 = slice_by_index(begin = var_7204_begin_0, end = var_7204_end_0, end_mask = var_7204_end_mask_0, x = var_7137_cast_fp16)[name = tensor("op_7204_cast_fp16")]; tensor var_7205_begin_0 = const()[name = tensor("op_7205_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7205_end_0 = const()[name = tensor("op_7205_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7205_end_mask_0 = const()[name = tensor("op_7205_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7205_cast_fp16 = slice_by_index(begin = var_7205_begin_0, end = var_7205_end_0, end_mask = var_7205_end_mask_0, x = var_7137_cast_fp16)[name = tensor("op_7205_cast_fp16")]; tensor var_7206_begin_0 = const()[name = tensor("op_7206_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7206_end_0 = const()[name = tensor("op_7206_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7206_end_mask_0 = const()[name = tensor("op_7206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7206_cast_fp16 = slice_by_index(begin = var_7206_begin_0, end = var_7206_end_0, end_mask = var_7206_end_mask_0, x = var_7137_cast_fp16)[name = tensor("op_7206_cast_fp16")]; tensor var_7207_begin_0 = const()[name = tensor("op_7207_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7207_end_0 = const()[name = tensor("op_7207_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7207_end_mask_0 = const()[name = tensor("op_7207_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7207_cast_fp16 = slice_by_index(begin = var_7207_begin_0, end = var_7207_end_0, end_mask = var_7207_end_mask_0, x = var_7137_cast_fp16)[name = tensor("op_7207_cast_fp16")]; tensor var_7208_begin_0 = const()[name = tensor("op_7208_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7208_end_0 = const()[name = tensor("op_7208_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7208_end_mask_0 = const()[name = tensor("op_7208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7208_cast_fp16 = slice_by_index(begin = var_7208_begin_0, end = var_7208_end_0, end_mask = var_7208_end_mask_0, x = var_7141_cast_fp16)[name = tensor("op_7208_cast_fp16")]; tensor var_7209_begin_0 = const()[name = tensor("op_7209_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7209_end_0 = const()[name = tensor("op_7209_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7209_end_mask_0 = const()[name = tensor("op_7209_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7209_cast_fp16 = slice_by_index(begin = var_7209_begin_0, end = var_7209_end_0, end_mask = var_7209_end_mask_0, x = var_7141_cast_fp16)[name = tensor("op_7209_cast_fp16")]; tensor var_7210_begin_0 = const()[name = tensor("op_7210_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7210_end_0 = const()[name = tensor("op_7210_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7210_end_mask_0 = const()[name = tensor("op_7210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7210_cast_fp16 = slice_by_index(begin = var_7210_begin_0, end = var_7210_end_0, end_mask = var_7210_end_mask_0, x = var_7141_cast_fp16)[name = tensor("op_7210_cast_fp16")]; tensor var_7211_begin_0 = const()[name = tensor("op_7211_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7211_end_0 = const()[name = tensor("op_7211_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7211_end_mask_0 = const()[name = tensor("op_7211_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7211_cast_fp16 = slice_by_index(begin = var_7211_begin_0, end = var_7211_end_0, end_mask = var_7211_end_mask_0, x = var_7141_cast_fp16)[name = tensor("op_7211_cast_fp16")]; tensor var_7212_begin_0 = const()[name = tensor("op_7212_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7212_end_0 = const()[name = tensor("op_7212_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7212_end_mask_0 = const()[name = tensor("op_7212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7212_cast_fp16 = slice_by_index(begin = var_7212_begin_0, end = var_7212_end_0, end_mask = var_7212_end_mask_0, x = var_7141_cast_fp16)[name = tensor("op_7212_cast_fp16")]; tensor var_7213_begin_0 = const()[name = tensor("op_7213_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7213_end_0 = const()[name = tensor("op_7213_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7213_end_mask_0 = const()[name = tensor("op_7213_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7213_cast_fp16 = slice_by_index(begin = var_7213_begin_0, end = var_7213_end_0, end_mask = var_7213_end_mask_0, x = var_7141_cast_fp16)[name = tensor("op_7213_cast_fp16")]; tensor var_7214_begin_0 = const()[name = tensor("op_7214_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7214_end_0 = const()[name = tensor("op_7214_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_7214_end_mask_0 = const()[name = tensor("op_7214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7214_cast_fp16 = slice_by_index(begin = var_7214_begin_0, end = var_7214_end_0, end_mask = var_7214_end_mask_0, x = var_7145_cast_fp16)[name = tensor("op_7214_cast_fp16")]; tensor var_7215_begin_0 = const()[name = tensor("op_7215_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7215_end_0 = const()[name = tensor("op_7215_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_7215_end_mask_0 = const()[name = tensor("op_7215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7215_cast_fp16 = slice_by_index(begin = var_7215_begin_0, end = var_7215_end_0, end_mask = var_7215_end_mask_0, x = var_7145_cast_fp16)[name = tensor("op_7215_cast_fp16")]; tensor var_7216_begin_0 = const()[name = tensor("op_7216_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7216_end_0 = const()[name = tensor("op_7216_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_7216_end_mask_0 = const()[name = tensor("op_7216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7216_cast_fp16 = slice_by_index(begin = var_7216_begin_0, end = var_7216_end_0, end_mask = var_7216_end_mask_0, x = var_7145_cast_fp16)[name = tensor("op_7216_cast_fp16")]; tensor var_7217_begin_0 = const()[name = tensor("op_7217_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7217_end_0 = const()[name = tensor("op_7217_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_7217_end_mask_0 = const()[name = tensor("op_7217_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7217_cast_fp16 = slice_by_index(begin = var_7217_begin_0, end = var_7217_end_0, end_mask = var_7217_end_mask_0, x = var_7145_cast_fp16)[name = tensor("op_7217_cast_fp16")]; tensor var_7218_begin_0 = const()[name = tensor("op_7218_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7218_end_0 = const()[name = tensor("op_7218_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_7218_end_mask_0 = const()[name = tensor("op_7218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7218_cast_fp16 = slice_by_index(begin = var_7218_begin_0, end = var_7218_end_0, end_mask = var_7218_end_mask_0, x = var_7145_cast_fp16)[name = tensor("op_7218_cast_fp16")]; tensor var_7219_begin_0 = const()[name = tensor("op_7219_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_7219_end_0 = const()[name = tensor("op_7219_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_7219_end_mask_0 = const()[name = tensor("op_7219_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7219_cast_fp16 = slice_by_index(begin = var_7219_begin_0, end = var_7219_end_0, end_mask = var_7219_end_mask_0, x = var_7145_cast_fp16)[name = tensor("op_7219_cast_fp16")]; tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_7224_begin_0 = const()[name = tensor("op_7224_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7224_end_0 = const()[name = tensor("op_7224_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_7224_end_mask_0 = const()[name = tensor("op_7224_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = key_17_cast_fp16)[name = tensor("transpose_3")]; tensor var_7224_cast_fp16 = slice_by_index(begin = var_7224_begin_0, end = var_7224_end_0, end_mask = var_7224_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7224_cast_fp16")]; tensor var_7228_begin_0 = const()[name = tensor("op_7228_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_7228_end_0 = const()[name = tensor("op_7228_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_7228_end_mask_0 = const()[name = tensor("op_7228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7228_cast_fp16 = slice_by_index(begin = var_7228_begin_0, end = var_7228_end_0, end_mask = var_7228_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7228_cast_fp16")]; tensor var_7232_begin_0 = const()[name = tensor("op_7232_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_7232_end_0 = const()[name = tensor("op_7232_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_7232_end_mask_0 = const()[name = tensor("op_7232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7232_cast_fp16 = slice_by_index(begin = var_7232_begin_0, end = var_7232_end_0, end_mask = var_7232_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7232_cast_fp16")]; tensor var_7236_begin_0 = const()[name = tensor("op_7236_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_7236_end_0 = const()[name = tensor("op_7236_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_7236_end_mask_0 = const()[name = tensor("op_7236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7236_cast_fp16 = slice_by_index(begin = var_7236_begin_0, end = var_7236_end_0, end_mask = var_7236_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7236_cast_fp16")]; tensor var_7240_begin_0 = const()[name = tensor("op_7240_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7240_end_0 = const()[name = tensor("op_7240_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_7240_end_mask_0 = const()[name = tensor("op_7240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7240_cast_fp16 = slice_by_index(begin = var_7240_begin_0, end = var_7240_end_0, end_mask = var_7240_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7240_cast_fp16")]; tensor var_7244_begin_0 = const()[name = tensor("op_7244_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_7244_end_0 = const()[name = tensor("op_7244_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_7244_end_mask_0 = const()[name = tensor("op_7244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7244_cast_fp16 = slice_by_index(begin = var_7244_begin_0, end = var_7244_end_0, end_mask = var_7244_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7244_cast_fp16")]; tensor var_7248_begin_0 = const()[name = tensor("op_7248_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_7248_end_0 = const()[name = tensor("op_7248_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_7248_end_mask_0 = const()[name = tensor("op_7248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7248_cast_fp16 = slice_by_index(begin = var_7248_begin_0, end = var_7248_end_0, end_mask = var_7248_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7248_cast_fp16")]; tensor var_7252_begin_0 = const()[name = tensor("op_7252_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_7252_end_0 = const()[name = tensor("op_7252_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_7252_end_mask_0 = const()[name = tensor("op_7252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7252_cast_fp16 = slice_by_index(begin = var_7252_begin_0, end = var_7252_end_0, end_mask = var_7252_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7252_cast_fp16")]; tensor var_7256_begin_0 = const()[name = tensor("op_7256_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7256_end_0 = const()[name = tensor("op_7256_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_7256_end_mask_0 = const()[name = tensor("op_7256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7256_cast_fp16 = slice_by_index(begin = var_7256_begin_0, end = var_7256_end_0, end_mask = var_7256_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7256_cast_fp16")]; tensor var_7260_begin_0 = const()[name = tensor("op_7260_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_7260_end_0 = const()[name = tensor("op_7260_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_7260_end_mask_0 = const()[name = tensor("op_7260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7260_cast_fp16 = slice_by_index(begin = var_7260_begin_0, end = var_7260_end_0, end_mask = var_7260_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7260_cast_fp16")]; tensor var_7264_begin_0 = const()[name = tensor("op_7264_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_7264_end_0 = const()[name = tensor("op_7264_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_7264_end_mask_0 = const()[name = tensor("op_7264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7264_cast_fp16 = slice_by_index(begin = var_7264_begin_0, end = var_7264_end_0, end_mask = var_7264_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7264_cast_fp16")]; tensor var_7268_begin_0 = const()[name = tensor("op_7268_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_7268_end_0 = const()[name = tensor("op_7268_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_7268_end_mask_0 = const()[name = tensor("op_7268_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7268_cast_fp16 = slice_by_index(begin = var_7268_begin_0, end = var_7268_end_0, end_mask = var_7268_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_7268_cast_fp16")]; tensor var_7270_begin_0 = const()[name = tensor("op_7270_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7270_end_0 = const()[name = tensor("op_7270_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7270_end_mask_0 = const()[name = tensor("op_7270_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7270_cast_fp16 = slice_by_index(begin = var_7270_begin_0, end = var_7270_end_0, end_mask = var_7270_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7270_cast_fp16")]; tensor var_7274_begin_0 = const()[name = tensor("op_7274_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_7274_end_0 = const()[name = tensor("op_7274_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_7274_end_mask_0 = const()[name = tensor("op_7274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7274_cast_fp16 = slice_by_index(begin = var_7274_begin_0, end = var_7274_end_0, end_mask = var_7274_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7274_cast_fp16")]; tensor var_7278_begin_0 = const()[name = tensor("op_7278_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_7278_end_0 = const()[name = tensor("op_7278_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_7278_end_mask_0 = const()[name = tensor("op_7278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7278_cast_fp16 = slice_by_index(begin = var_7278_begin_0, end = var_7278_end_0, end_mask = var_7278_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7278_cast_fp16")]; tensor var_7282_begin_0 = const()[name = tensor("op_7282_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_7282_end_0 = const()[name = tensor("op_7282_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_7282_end_mask_0 = const()[name = tensor("op_7282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7282_cast_fp16 = slice_by_index(begin = var_7282_begin_0, end = var_7282_end_0, end_mask = var_7282_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7282_cast_fp16")]; tensor var_7286_begin_0 = const()[name = tensor("op_7286_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_7286_end_0 = const()[name = tensor("op_7286_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_7286_end_mask_0 = const()[name = tensor("op_7286_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7286_cast_fp16 = slice_by_index(begin = var_7286_begin_0, end = var_7286_end_0, end_mask = var_7286_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7286_cast_fp16")]; tensor var_7290_begin_0 = const()[name = tensor("op_7290_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7290_end_0 = const()[name = tensor("op_7290_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_7290_end_mask_0 = const()[name = tensor("op_7290_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7290_cast_fp16 = slice_by_index(begin = var_7290_begin_0, end = var_7290_end_0, end_mask = var_7290_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7290_cast_fp16")]; tensor var_7294_begin_0 = const()[name = tensor("op_7294_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_7294_end_0 = const()[name = tensor("op_7294_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_7294_end_mask_0 = const()[name = tensor("op_7294_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7294_cast_fp16 = slice_by_index(begin = var_7294_begin_0, end = var_7294_end_0, end_mask = var_7294_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7294_cast_fp16")]; tensor var_7298_begin_0 = const()[name = tensor("op_7298_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_7298_end_0 = const()[name = tensor("op_7298_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_7298_end_mask_0 = const()[name = tensor("op_7298_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7298_cast_fp16 = slice_by_index(begin = var_7298_begin_0, end = var_7298_end_0, end_mask = var_7298_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7298_cast_fp16")]; tensor var_7302_begin_0 = const()[name = tensor("op_7302_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_7302_end_0 = const()[name = tensor("op_7302_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_7302_end_mask_0 = const()[name = tensor("op_7302_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7302_cast_fp16 = slice_by_index(begin = var_7302_begin_0, end = var_7302_end_0, end_mask = var_7302_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7302_cast_fp16")]; tensor var_7306_begin_0 = const()[name = tensor("op_7306_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_7306_end_0 = const()[name = tensor("op_7306_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_7306_end_mask_0 = const()[name = tensor("op_7306_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7306_cast_fp16 = slice_by_index(begin = var_7306_begin_0, end = var_7306_end_0, end_mask = var_7306_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7306_cast_fp16")]; tensor var_7310_begin_0 = const()[name = tensor("op_7310_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7310_end_0 = const()[name = tensor("op_7310_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_7310_end_mask_0 = const()[name = tensor("op_7310_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7310_cast_fp16 = slice_by_index(begin = var_7310_begin_0, end = var_7310_end_0, end_mask = var_7310_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7310_cast_fp16")]; tensor var_7314_begin_0 = const()[name = tensor("op_7314_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_7314_end_0 = const()[name = tensor("op_7314_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_7314_end_mask_0 = const()[name = tensor("op_7314_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_7314_cast_fp16 = slice_by_index(begin = var_7314_begin_0, end = var_7314_end_0, end_mask = var_7314_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_7314_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1153_equation_0, values = (var_7224_cast_fp16, var_7148_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1155_equation_0, values = (var_7224_cast_fp16, var_7149_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1157_equation_0, values = (var_7224_cast_fp16, var_7150_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1159_equation_0, values = (var_7224_cast_fp16, var_7151_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1161_equation_0, values = (var_7224_cast_fp16, var_7152_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1163_equation_0, values = (var_7224_cast_fp16, var_7153_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1165_equation_0, values = (var_7228_cast_fp16, var_7154_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1167_equation_0, values = (var_7228_cast_fp16, var_7155_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1169_equation_0, values = (var_7228_cast_fp16, var_7156_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1171_equation_0, values = (var_7228_cast_fp16, var_7157_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1173_equation_0, values = (var_7228_cast_fp16, var_7158_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1175_equation_0, values = (var_7228_cast_fp16, var_7159_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1177_equation_0, values = (var_7232_cast_fp16, var_7160_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1179_equation_0, values = (var_7232_cast_fp16, var_7161_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1181_equation_0, values = (var_7232_cast_fp16, var_7162_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1183_equation_0, values = (var_7232_cast_fp16, var_7163_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1185_equation_0, values = (var_7232_cast_fp16, var_7164_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1187_equation_0, values = (var_7232_cast_fp16, var_7165_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1189_equation_0, values = (var_7236_cast_fp16, var_7166_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1191_equation_0, values = (var_7236_cast_fp16, var_7167_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1193_equation_0, values = (var_7236_cast_fp16, var_7168_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1195_equation_0, values = (var_7236_cast_fp16, var_7169_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1197_equation_0, values = (var_7236_cast_fp16, var_7170_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1199_equation_0, values = (var_7236_cast_fp16, var_7171_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1201_equation_0, values = (var_7240_cast_fp16, var_7172_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1203_equation_0, values = (var_7240_cast_fp16, var_7173_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1205_equation_0, values = (var_7240_cast_fp16, var_7174_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1207_equation_0, values = (var_7240_cast_fp16, var_7175_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1209_equation_0, values = (var_7240_cast_fp16, var_7176_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1211_equation_0, values = (var_7240_cast_fp16, var_7177_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1213_equation_0, values = (var_7244_cast_fp16, var_7178_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1215_equation_0, values = (var_7244_cast_fp16, var_7179_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1217_equation_0, values = (var_7244_cast_fp16, var_7180_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1219_equation_0, values = (var_7244_cast_fp16, var_7181_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1221_equation_0, values = (var_7244_cast_fp16, var_7182_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1223_equation_0, values = (var_7244_cast_fp16, var_7183_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1225_equation_0, values = (var_7248_cast_fp16, var_7184_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1227_equation_0, values = (var_7248_cast_fp16, var_7185_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1229_equation_0, values = (var_7248_cast_fp16, var_7186_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1231_equation_0, values = (var_7248_cast_fp16, var_7187_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1233_equation_0, values = (var_7248_cast_fp16, var_7188_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1235_equation_0, values = (var_7248_cast_fp16, var_7189_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1237_equation_0, values = (var_7252_cast_fp16, var_7190_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1239_equation_0, values = (var_7252_cast_fp16, var_7191_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1241_equation_0, values = (var_7252_cast_fp16, var_7192_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1243_equation_0, values = (var_7252_cast_fp16, var_7193_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1245_equation_0, values = (var_7252_cast_fp16, var_7194_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1247_equation_0, values = (var_7252_cast_fp16, var_7195_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1249_equation_0, values = (var_7256_cast_fp16, var_7196_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1251_equation_0, values = (var_7256_cast_fp16, var_7197_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1253_equation_0, values = (var_7256_cast_fp16, var_7198_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1255_equation_0, values = (var_7256_cast_fp16, var_7199_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1257_equation_0, values = (var_7256_cast_fp16, var_7200_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1259_equation_0, values = (var_7256_cast_fp16, var_7201_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1261_equation_0, values = (var_7260_cast_fp16, var_7202_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1263_equation_0, values = (var_7260_cast_fp16, var_7203_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1265_equation_0, values = (var_7260_cast_fp16, var_7204_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1267_equation_0, values = (var_7260_cast_fp16, var_7205_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1269_equation_0, values = (var_7260_cast_fp16, var_7206_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1271_equation_0, values = (var_7260_cast_fp16, var_7207_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1273_equation_0, values = (var_7264_cast_fp16, var_7208_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1275_equation_0, values = (var_7264_cast_fp16, var_7209_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1277_equation_0, values = (var_7264_cast_fp16, var_7210_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1279_equation_0, values = (var_7264_cast_fp16, var_7211_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1281_equation_0, values = (var_7264_cast_fp16, var_7212_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1283_equation_0, values = (var_7264_cast_fp16, var_7213_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1285_equation_0, values = (var_7268_cast_fp16, var_7214_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1287_equation_0, values = (var_7268_cast_fp16, var_7215_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1287_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1289_equation_0, values = (var_7268_cast_fp16, var_7216_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1291_equation_0, values = (var_7268_cast_fp16, var_7217_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1293_equation_0, values = (var_7268_cast_fp16, var_7218_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1295_equation_0, values = (var_7268_cast_fp16, var_7219_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1295_cast_fp16")]; tensor var_7461_to_fp16 = const()[name = tensor("op_7461_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1153_cast_fp16, y = var_7461_to_fp16)[name = tensor("aw_chunk_1153_cast_fp16")]; tensor var_7463_to_fp16 = const()[name = tensor("op_7463_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1155_cast_fp16, y = var_7463_to_fp16)[name = tensor("aw_chunk_1155_cast_fp16")]; tensor var_7465_to_fp16 = const()[name = tensor("op_7465_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1157_cast_fp16, y = var_7465_to_fp16)[name = tensor("aw_chunk_1157_cast_fp16")]; tensor var_7467_to_fp16 = const()[name = tensor("op_7467_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1159_cast_fp16, y = var_7467_to_fp16)[name = tensor("aw_chunk_1159_cast_fp16")]; tensor var_7469_to_fp16 = const()[name = tensor("op_7469_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1161_cast_fp16, y = var_7469_to_fp16)[name = tensor("aw_chunk_1161_cast_fp16")]; tensor var_7471_to_fp16 = const()[name = tensor("op_7471_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1163_cast_fp16, y = var_7471_to_fp16)[name = tensor("aw_chunk_1163_cast_fp16")]; tensor var_7473_to_fp16 = const()[name = tensor("op_7473_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1165_cast_fp16, y = var_7473_to_fp16)[name = tensor("aw_chunk_1165_cast_fp16")]; tensor var_7475_to_fp16 = const()[name = tensor("op_7475_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1167_cast_fp16, y = var_7475_to_fp16)[name = tensor("aw_chunk_1167_cast_fp16")]; tensor var_7477_to_fp16 = const()[name = tensor("op_7477_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1169_cast_fp16, y = var_7477_to_fp16)[name = tensor("aw_chunk_1169_cast_fp16")]; tensor var_7479_to_fp16 = const()[name = tensor("op_7479_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1171_cast_fp16, y = var_7479_to_fp16)[name = tensor("aw_chunk_1171_cast_fp16")]; tensor var_7481_to_fp16 = const()[name = tensor("op_7481_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1173_cast_fp16, y = var_7481_to_fp16)[name = tensor("aw_chunk_1173_cast_fp16")]; tensor var_7483_to_fp16 = const()[name = tensor("op_7483_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1175_cast_fp16, y = var_7483_to_fp16)[name = tensor("aw_chunk_1175_cast_fp16")]; tensor var_7485_to_fp16 = const()[name = tensor("op_7485_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1177_cast_fp16, y = var_7485_to_fp16)[name = tensor("aw_chunk_1177_cast_fp16")]; tensor var_7487_to_fp16 = const()[name = tensor("op_7487_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1179_cast_fp16, y = var_7487_to_fp16)[name = tensor("aw_chunk_1179_cast_fp16")]; tensor var_7489_to_fp16 = const()[name = tensor("op_7489_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1181_cast_fp16, y = var_7489_to_fp16)[name = tensor("aw_chunk_1181_cast_fp16")]; tensor var_7491_to_fp16 = const()[name = tensor("op_7491_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1183_cast_fp16, y = var_7491_to_fp16)[name = tensor("aw_chunk_1183_cast_fp16")]; tensor var_7493_to_fp16 = const()[name = tensor("op_7493_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1185_cast_fp16, y = var_7493_to_fp16)[name = tensor("aw_chunk_1185_cast_fp16")]; tensor var_7495_to_fp16 = const()[name = tensor("op_7495_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1187_cast_fp16, y = var_7495_to_fp16)[name = tensor("aw_chunk_1187_cast_fp16")]; tensor var_7497_to_fp16 = const()[name = tensor("op_7497_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1189_cast_fp16, y = var_7497_to_fp16)[name = tensor("aw_chunk_1189_cast_fp16")]; tensor var_7499_to_fp16 = const()[name = tensor("op_7499_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1191_cast_fp16, y = var_7499_to_fp16)[name = tensor("aw_chunk_1191_cast_fp16")]; tensor var_7501_to_fp16 = const()[name = tensor("op_7501_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1193_cast_fp16, y = var_7501_to_fp16)[name = tensor("aw_chunk_1193_cast_fp16")]; tensor var_7503_to_fp16 = const()[name = tensor("op_7503_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1195_cast_fp16, y = var_7503_to_fp16)[name = tensor("aw_chunk_1195_cast_fp16")]; tensor var_7505_to_fp16 = const()[name = tensor("op_7505_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1197_cast_fp16, y = var_7505_to_fp16)[name = tensor("aw_chunk_1197_cast_fp16")]; tensor var_7507_to_fp16 = const()[name = tensor("op_7507_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1199_cast_fp16, y = var_7507_to_fp16)[name = tensor("aw_chunk_1199_cast_fp16")]; tensor var_7509_to_fp16 = const()[name = tensor("op_7509_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1201_cast_fp16, y = var_7509_to_fp16)[name = tensor("aw_chunk_1201_cast_fp16")]; tensor var_7511_to_fp16 = const()[name = tensor("op_7511_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1203_cast_fp16, y = var_7511_to_fp16)[name = tensor("aw_chunk_1203_cast_fp16")]; tensor var_7513_to_fp16 = const()[name = tensor("op_7513_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1205_cast_fp16, y = var_7513_to_fp16)[name = tensor("aw_chunk_1205_cast_fp16")]; tensor var_7515_to_fp16 = const()[name = tensor("op_7515_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1207_cast_fp16, y = var_7515_to_fp16)[name = tensor("aw_chunk_1207_cast_fp16")]; tensor var_7517_to_fp16 = const()[name = tensor("op_7517_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1209_cast_fp16, y = var_7517_to_fp16)[name = tensor("aw_chunk_1209_cast_fp16")]; tensor var_7519_to_fp16 = const()[name = tensor("op_7519_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1211_cast_fp16, y = var_7519_to_fp16)[name = tensor("aw_chunk_1211_cast_fp16")]; tensor var_7521_to_fp16 = const()[name = tensor("op_7521_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1213_cast_fp16, y = var_7521_to_fp16)[name = tensor("aw_chunk_1213_cast_fp16")]; tensor var_7523_to_fp16 = const()[name = tensor("op_7523_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1215_cast_fp16, y = var_7523_to_fp16)[name = tensor("aw_chunk_1215_cast_fp16")]; tensor var_7525_to_fp16 = const()[name = tensor("op_7525_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1217_cast_fp16, y = var_7525_to_fp16)[name = tensor("aw_chunk_1217_cast_fp16")]; tensor var_7527_to_fp16 = const()[name = tensor("op_7527_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1219_cast_fp16, y = var_7527_to_fp16)[name = tensor("aw_chunk_1219_cast_fp16")]; tensor var_7529_to_fp16 = const()[name = tensor("op_7529_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1221_cast_fp16, y = var_7529_to_fp16)[name = tensor("aw_chunk_1221_cast_fp16")]; tensor var_7531_to_fp16 = const()[name = tensor("op_7531_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1223_cast_fp16, y = var_7531_to_fp16)[name = tensor("aw_chunk_1223_cast_fp16")]; tensor var_7533_to_fp16 = const()[name = tensor("op_7533_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1225_cast_fp16, y = var_7533_to_fp16)[name = tensor("aw_chunk_1225_cast_fp16")]; tensor var_7535_to_fp16 = const()[name = tensor("op_7535_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1227_cast_fp16, y = var_7535_to_fp16)[name = tensor("aw_chunk_1227_cast_fp16")]; tensor var_7537_to_fp16 = const()[name = tensor("op_7537_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1229_cast_fp16, y = var_7537_to_fp16)[name = tensor("aw_chunk_1229_cast_fp16")]; tensor var_7539_to_fp16 = const()[name = tensor("op_7539_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1231_cast_fp16, y = var_7539_to_fp16)[name = tensor("aw_chunk_1231_cast_fp16")]; tensor var_7541_to_fp16 = const()[name = tensor("op_7541_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1233_cast_fp16, y = var_7541_to_fp16)[name = tensor("aw_chunk_1233_cast_fp16")]; tensor var_7543_to_fp16 = const()[name = tensor("op_7543_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1235_cast_fp16, y = var_7543_to_fp16)[name = tensor("aw_chunk_1235_cast_fp16")]; tensor var_7545_to_fp16 = const()[name = tensor("op_7545_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1237_cast_fp16, y = var_7545_to_fp16)[name = tensor("aw_chunk_1237_cast_fp16")]; tensor var_7547_to_fp16 = const()[name = tensor("op_7547_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1239_cast_fp16, y = var_7547_to_fp16)[name = tensor("aw_chunk_1239_cast_fp16")]; tensor var_7549_to_fp16 = const()[name = tensor("op_7549_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1241_cast_fp16, y = var_7549_to_fp16)[name = tensor("aw_chunk_1241_cast_fp16")]; tensor var_7551_to_fp16 = const()[name = tensor("op_7551_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1243_cast_fp16, y = var_7551_to_fp16)[name = tensor("aw_chunk_1243_cast_fp16")]; tensor var_7553_to_fp16 = const()[name = tensor("op_7553_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1245_cast_fp16, y = var_7553_to_fp16)[name = tensor("aw_chunk_1245_cast_fp16")]; tensor var_7555_to_fp16 = const()[name = tensor("op_7555_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1247_cast_fp16, y = var_7555_to_fp16)[name = tensor("aw_chunk_1247_cast_fp16")]; tensor var_7557_to_fp16 = const()[name = tensor("op_7557_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1249_cast_fp16, y = var_7557_to_fp16)[name = tensor("aw_chunk_1249_cast_fp16")]; tensor var_7559_to_fp16 = const()[name = tensor("op_7559_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1251_cast_fp16, y = var_7559_to_fp16)[name = tensor("aw_chunk_1251_cast_fp16")]; tensor var_7561_to_fp16 = const()[name = tensor("op_7561_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1253_cast_fp16, y = var_7561_to_fp16)[name = tensor("aw_chunk_1253_cast_fp16")]; tensor var_7563_to_fp16 = const()[name = tensor("op_7563_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1255_cast_fp16, y = var_7563_to_fp16)[name = tensor("aw_chunk_1255_cast_fp16")]; tensor var_7565_to_fp16 = const()[name = tensor("op_7565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1257_cast_fp16, y = var_7565_to_fp16)[name = tensor("aw_chunk_1257_cast_fp16")]; tensor var_7567_to_fp16 = const()[name = tensor("op_7567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1259_cast_fp16, y = var_7567_to_fp16)[name = tensor("aw_chunk_1259_cast_fp16")]; tensor var_7569_to_fp16 = const()[name = tensor("op_7569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1261_cast_fp16, y = var_7569_to_fp16)[name = tensor("aw_chunk_1261_cast_fp16")]; tensor var_7571_to_fp16 = const()[name = tensor("op_7571_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1263_cast_fp16, y = var_7571_to_fp16)[name = tensor("aw_chunk_1263_cast_fp16")]; tensor var_7573_to_fp16 = const()[name = tensor("op_7573_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1265_cast_fp16, y = var_7573_to_fp16)[name = tensor("aw_chunk_1265_cast_fp16")]; tensor var_7575_to_fp16 = const()[name = tensor("op_7575_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1267_cast_fp16, y = var_7575_to_fp16)[name = tensor("aw_chunk_1267_cast_fp16")]; tensor var_7577_to_fp16 = const()[name = tensor("op_7577_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1269_cast_fp16, y = var_7577_to_fp16)[name = tensor("aw_chunk_1269_cast_fp16")]; tensor var_7579_to_fp16 = const()[name = tensor("op_7579_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1271_cast_fp16, y = var_7579_to_fp16)[name = tensor("aw_chunk_1271_cast_fp16")]; tensor var_7581_to_fp16 = const()[name = tensor("op_7581_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1273_cast_fp16, y = var_7581_to_fp16)[name = tensor("aw_chunk_1273_cast_fp16")]; tensor var_7583_to_fp16 = const()[name = tensor("op_7583_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1275_cast_fp16, y = var_7583_to_fp16)[name = tensor("aw_chunk_1275_cast_fp16")]; tensor var_7585_to_fp16 = const()[name = tensor("op_7585_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1277_cast_fp16, y = var_7585_to_fp16)[name = tensor("aw_chunk_1277_cast_fp16")]; tensor var_7587_to_fp16 = const()[name = tensor("op_7587_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1279_cast_fp16, y = var_7587_to_fp16)[name = tensor("aw_chunk_1279_cast_fp16")]; tensor var_7589_to_fp16 = const()[name = tensor("op_7589_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1281_cast_fp16, y = var_7589_to_fp16)[name = tensor("aw_chunk_1281_cast_fp16")]; tensor var_7591_to_fp16 = const()[name = tensor("op_7591_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1283_cast_fp16, y = var_7591_to_fp16)[name = tensor("aw_chunk_1283_cast_fp16")]; tensor var_7593_to_fp16 = const()[name = tensor("op_7593_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1285_cast_fp16, y = var_7593_to_fp16)[name = tensor("aw_chunk_1285_cast_fp16")]; tensor var_7595_to_fp16 = const()[name = tensor("op_7595_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1287_cast_fp16, y = var_7595_to_fp16)[name = tensor("aw_chunk_1287_cast_fp16")]; tensor var_7597_to_fp16 = const()[name = tensor("op_7597_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1289_cast_fp16, y = var_7597_to_fp16)[name = tensor("aw_chunk_1289_cast_fp16")]; tensor var_7599_to_fp16 = const()[name = tensor("op_7599_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1291_cast_fp16, y = var_7599_to_fp16)[name = tensor("aw_chunk_1291_cast_fp16")]; tensor var_7601_to_fp16 = const()[name = tensor("op_7601_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1293_cast_fp16, y = var_7601_to_fp16)[name = tensor("aw_chunk_1293_cast_fp16")]; tensor var_7603_to_fp16 = const()[name = tensor("op_7603_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1295_cast_fp16, y = var_7603_to_fp16)[name = tensor("aw_chunk_1295_cast_fp16")]; tensor var_7605_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1153_cast_fp16)[name = tensor("op_7605_cast_fp16")]; tensor var_7606_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1155_cast_fp16)[name = tensor("op_7606_cast_fp16")]; tensor var_7607_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1157_cast_fp16)[name = tensor("op_7607_cast_fp16")]; tensor var_7608_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1159_cast_fp16)[name = tensor("op_7608_cast_fp16")]; tensor var_7609_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1161_cast_fp16)[name = tensor("op_7609_cast_fp16")]; tensor var_7610_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1163_cast_fp16)[name = tensor("op_7610_cast_fp16")]; tensor var_7611_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1165_cast_fp16)[name = tensor("op_7611_cast_fp16")]; tensor var_7612_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1167_cast_fp16)[name = tensor("op_7612_cast_fp16")]; tensor var_7613_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1169_cast_fp16)[name = tensor("op_7613_cast_fp16")]; tensor var_7614_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1171_cast_fp16)[name = tensor("op_7614_cast_fp16")]; tensor var_7615_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1173_cast_fp16)[name = tensor("op_7615_cast_fp16")]; tensor var_7616_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1175_cast_fp16)[name = tensor("op_7616_cast_fp16")]; tensor var_7617_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1177_cast_fp16)[name = tensor("op_7617_cast_fp16")]; tensor var_7618_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1179_cast_fp16)[name = tensor("op_7618_cast_fp16")]; tensor var_7619_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1181_cast_fp16)[name = tensor("op_7619_cast_fp16")]; tensor var_7620_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1183_cast_fp16)[name = tensor("op_7620_cast_fp16")]; tensor var_7621_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1185_cast_fp16)[name = tensor("op_7621_cast_fp16")]; tensor var_7622_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1187_cast_fp16)[name = tensor("op_7622_cast_fp16")]; tensor var_7623_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1189_cast_fp16)[name = tensor("op_7623_cast_fp16")]; tensor var_7624_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1191_cast_fp16)[name = tensor("op_7624_cast_fp16")]; tensor var_7625_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1193_cast_fp16)[name = tensor("op_7625_cast_fp16")]; tensor var_7626_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1195_cast_fp16)[name = tensor("op_7626_cast_fp16")]; tensor var_7627_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1197_cast_fp16)[name = tensor("op_7627_cast_fp16")]; tensor var_7628_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1199_cast_fp16)[name = tensor("op_7628_cast_fp16")]; tensor var_7629_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1201_cast_fp16)[name = tensor("op_7629_cast_fp16")]; tensor var_7630_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1203_cast_fp16)[name = tensor("op_7630_cast_fp16")]; tensor var_7631_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1205_cast_fp16)[name = tensor("op_7631_cast_fp16")]; tensor var_7632_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1207_cast_fp16)[name = tensor("op_7632_cast_fp16")]; tensor var_7633_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1209_cast_fp16)[name = tensor("op_7633_cast_fp16")]; tensor var_7634_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1211_cast_fp16)[name = tensor("op_7634_cast_fp16")]; tensor var_7635_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1213_cast_fp16)[name = tensor("op_7635_cast_fp16")]; tensor var_7636_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1215_cast_fp16)[name = tensor("op_7636_cast_fp16")]; tensor var_7637_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1217_cast_fp16)[name = tensor("op_7637_cast_fp16")]; tensor var_7638_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1219_cast_fp16)[name = tensor("op_7638_cast_fp16")]; tensor var_7639_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1221_cast_fp16)[name = tensor("op_7639_cast_fp16")]; tensor var_7640_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1223_cast_fp16)[name = tensor("op_7640_cast_fp16")]; tensor var_7641_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1225_cast_fp16)[name = tensor("op_7641_cast_fp16")]; tensor var_7642_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1227_cast_fp16)[name = tensor("op_7642_cast_fp16")]; tensor var_7643_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1229_cast_fp16)[name = tensor("op_7643_cast_fp16")]; tensor var_7644_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1231_cast_fp16)[name = tensor("op_7644_cast_fp16")]; tensor var_7645_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1233_cast_fp16)[name = tensor("op_7645_cast_fp16")]; tensor var_7646_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1235_cast_fp16)[name = tensor("op_7646_cast_fp16")]; tensor var_7647_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1237_cast_fp16)[name = tensor("op_7647_cast_fp16")]; tensor var_7648_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1239_cast_fp16)[name = tensor("op_7648_cast_fp16")]; tensor var_7649_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1241_cast_fp16)[name = tensor("op_7649_cast_fp16")]; tensor var_7650_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1243_cast_fp16)[name = tensor("op_7650_cast_fp16")]; tensor var_7651_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1245_cast_fp16)[name = tensor("op_7651_cast_fp16")]; tensor var_7652_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1247_cast_fp16)[name = tensor("op_7652_cast_fp16")]; tensor var_7653_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1249_cast_fp16)[name = tensor("op_7653_cast_fp16")]; tensor var_7654_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1251_cast_fp16)[name = tensor("op_7654_cast_fp16")]; tensor var_7655_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1253_cast_fp16)[name = tensor("op_7655_cast_fp16")]; tensor var_7656_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1255_cast_fp16)[name = tensor("op_7656_cast_fp16")]; tensor var_7657_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1257_cast_fp16)[name = tensor("op_7657_cast_fp16")]; tensor var_7658_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1259_cast_fp16)[name = tensor("op_7658_cast_fp16")]; tensor var_7659_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1261_cast_fp16)[name = tensor("op_7659_cast_fp16")]; tensor var_7660_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1263_cast_fp16)[name = tensor("op_7660_cast_fp16")]; tensor var_7661_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1265_cast_fp16)[name = tensor("op_7661_cast_fp16")]; tensor var_7662_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1267_cast_fp16)[name = tensor("op_7662_cast_fp16")]; tensor var_7663_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1269_cast_fp16)[name = tensor("op_7663_cast_fp16")]; tensor var_7664_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1271_cast_fp16)[name = tensor("op_7664_cast_fp16")]; tensor var_7665_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1273_cast_fp16)[name = tensor("op_7665_cast_fp16")]; tensor var_7666_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1275_cast_fp16)[name = tensor("op_7666_cast_fp16")]; tensor var_7667_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1277_cast_fp16)[name = tensor("op_7667_cast_fp16")]; tensor var_7668_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1279_cast_fp16)[name = tensor("op_7668_cast_fp16")]; tensor var_7669_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1281_cast_fp16)[name = tensor("op_7669_cast_fp16")]; tensor var_7670_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1283_cast_fp16)[name = tensor("op_7670_cast_fp16")]; tensor var_7671_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1285_cast_fp16)[name = tensor("op_7671_cast_fp16")]; tensor var_7672_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1287_cast_fp16)[name = tensor("op_7672_cast_fp16")]; tensor var_7673_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1289_cast_fp16)[name = tensor("op_7673_cast_fp16")]; tensor var_7674_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1291_cast_fp16)[name = tensor("op_7674_cast_fp16")]; tensor var_7675_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1293_cast_fp16)[name = tensor("op_7675_cast_fp16")]; tensor var_7676_cast_fp16 = softmax(axis = var_7049, x = aw_chunk_1295_cast_fp16)[name = tensor("op_7676_cast_fp16")]; tensor var_7678_equation_0 = const()[name = tensor("op_7678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7678_cast_fp16 = einsum(equation = var_7678_equation_0, values = (var_7270_cast_fp16, var_7605_cast_fp16))[name = tensor("op_7678_cast_fp16")]; tensor var_7680_equation_0 = const()[name = tensor("op_7680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7680_cast_fp16 = einsum(equation = var_7680_equation_0, values = (var_7270_cast_fp16, var_7606_cast_fp16))[name = tensor("op_7680_cast_fp16")]; tensor var_7682_equation_0 = const()[name = tensor("op_7682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7682_cast_fp16 = einsum(equation = var_7682_equation_0, values = (var_7270_cast_fp16, var_7607_cast_fp16))[name = tensor("op_7682_cast_fp16")]; tensor var_7684_equation_0 = const()[name = tensor("op_7684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7684_cast_fp16 = einsum(equation = var_7684_equation_0, values = (var_7270_cast_fp16, var_7608_cast_fp16))[name = tensor("op_7684_cast_fp16")]; tensor var_7686_equation_0 = const()[name = tensor("op_7686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7686_cast_fp16 = einsum(equation = var_7686_equation_0, values = (var_7270_cast_fp16, var_7609_cast_fp16))[name = tensor("op_7686_cast_fp16")]; tensor var_7688_equation_0 = const()[name = tensor("op_7688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7688_cast_fp16 = einsum(equation = var_7688_equation_0, values = (var_7270_cast_fp16, var_7610_cast_fp16))[name = tensor("op_7688_cast_fp16")]; tensor var_7690_equation_0 = const()[name = tensor("op_7690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7690_cast_fp16 = einsum(equation = var_7690_equation_0, values = (var_7274_cast_fp16, var_7611_cast_fp16))[name = tensor("op_7690_cast_fp16")]; tensor var_7692_equation_0 = const()[name = tensor("op_7692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7692_cast_fp16 = einsum(equation = var_7692_equation_0, values = (var_7274_cast_fp16, var_7612_cast_fp16))[name = tensor("op_7692_cast_fp16")]; tensor var_7694_equation_0 = const()[name = tensor("op_7694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7694_cast_fp16 = einsum(equation = var_7694_equation_0, values = (var_7274_cast_fp16, var_7613_cast_fp16))[name = tensor("op_7694_cast_fp16")]; tensor var_7696_equation_0 = const()[name = tensor("op_7696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7696_cast_fp16 = einsum(equation = var_7696_equation_0, values = (var_7274_cast_fp16, var_7614_cast_fp16))[name = tensor("op_7696_cast_fp16")]; tensor var_7698_equation_0 = const()[name = tensor("op_7698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7698_cast_fp16 = einsum(equation = var_7698_equation_0, values = (var_7274_cast_fp16, var_7615_cast_fp16))[name = tensor("op_7698_cast_fp16")]; tensor var_7700_equation_0 = const()[name = tensor("op_7700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7700_cast_fp16 = einsum(equation = var_7700_equation_0, values = (var_7274_cast_fp16, var_7616_cast_fp16))[name = tensor("op_7700_cast_fp16")]; tensor var_7702_equation_0 = const()[name = tensor("op_7702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7702_cast_fp16 = einsum(equation = var_7702_equation_0, values = (var_7278_cast_fp16, var_7617_cast_fp16))[name = tensor("op_7702_cast_fp16")]; tensor var_7704_equation_0 = const()[name = tensor("op_7704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7704_cast_fp16 = einsum(equation = var_7704_equation_0, values = (var_7278_cast_fp16, var_7618_cast_fp16))[name = tensor("op_7704_cast_fp16")]; tensor var_7706_equation_0 = const()[name = tensor("op_7706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7706_cast_fp16 = einsum(equation = var_7706_equation_0, values = (var_7278_cast_fp16, var_7619_cast_fp16))[name = tensor("op_7706_cast_fp16")]; tensor var_7708_equation_0 = const()[name = tensor("op_7708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7708_cast_fp16 = einsum(equation = var_7708_equation_0, values = (var_7278_cast_fp16, var_7620_cast_fp16))[name = tensor("op_7708_cast_fp16")]; tensor var_7710_equation_0 = const()[name = tensor("op_7710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7710_cast_fp16 = einsum(equation = var_7710_equation_0, values = (var_7278_cast_fp16, var_7621_cast_fp16))[name = tensor("op_7710_cast_fp16")]; tensor var_7712_equation_0 = const()[name = tensor("op_7712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7712_cast_fp16 = einsum(equation = var_7712_equation_0, values = (var_7278_cast_fp16, var_7622_cast_fp16))[name = tensor("op_7712_cast_fp16")]; tensor var_7714_equation_0 = const()[name = tensor("op_7714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7714_cast_fp16 = einsum(equation = var_7714_equation_0, values = (var_7282_cast_fp16, var_7623_cast_fp16))[name = tensor("op_7714_cast_fp16")]; tensor var_7716_equation_0 = const()[name = tensor("op_7716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7716_cast_fp16 = einsum(equation = var_7716_equation_0, values = (var_7282_cast_fp16, var_7624_cast_fp16))[name = tensor("op_7716_cast_fp16")]; tensor var_7718_equation_0 = const()[name = tensor("op_7718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7718_cast_fp16 = einsum(equation = var_7718_equation_0, values = (var_7282_cast_fp16, var_7625_cast_fp16))[name = tensor("op_7718_cast_fp16")]; tensor var_7720_equation_0 = const()[name = tensor("op_7720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7720_cast_fp16 = einsum(equation = var_7720_equation_0, values = (var_7282_cast_fp16, var_7626_cast_fp16))[name = tensor("op_7720_cast_fp16")]; tensor var_7722_equation_0 = const()[name = tensor("op_7722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7722_cast_fp16 = einsum(equation = var_7722_equation_0, values = (var_7282_cast_fp16, var_7627_cast_fp16))[name = tensor("op_7722_cast_fp16")]; tensor var_7724_equation_0 = const()[name = tensor("op_7724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7724_cast_fp16 = einsum(equation = var_7724_equation_0, values = (var_7282_cast_fp16, var_7628_cast_fp16))[name = tensor("op_7724_cast_fp16")]; tensor var_7726_equation_0 = const()[name = tensor("op_7726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7726_cast_fp16 = einsum(equation = var_7726_equation_0, values = (var_7286_cast_fp16, var_7629_cast_fp16))[name = tensor("op_7726_cast_fp16")]; tensor var_7728_equation_0 = const()[name = tensor("op_7728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7728_cast_fp16 = einsum(equation = var_7728_equation_0, values = (var_7286_cast_fp16, var_7630_cast_fp16))[name = tensor("op_7728_cast_fp16")]; tensor var_7730_equation_0 = const()[name = tensor("op_7730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7730_cast_fp16 = einsum(equation = var_7730_equation_0, values = (var_7286_cast_fp16, var_7631_cast_fp16))[name = tensor("op_7730_cast_fp16")]; tensor var_7732_equation_0 = const()[name = tensor("op_7732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7732_cast_fp16 = einsum(equation = var_7732_equation_0, values = (var_7286_cast_fp16, var_7632_cast_fp16))[name = tensor("op_7732_cast_fp16")]; tensor var_7734_equation_0 = const()[name = tensor("op_7734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7734_cast_fp16 = einsum(equation = var_7734_equation_0, values = (var_7286_cast_fp16, var_7633_cast_fp16))[name = tensor("op_7734_cast_fp16")]; tensor var_7736_equation_0 = const()[name = tensor("op_7736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7736_cast_fp16 = einsum(equation = var_7736_equation_0, values = (var_7286_cast_fp16, var_7634_cast_fp16))[name = tensor("op_7736_cast_fp16")]; tensor var_7738_equation_0 = const()[name = tensor("op_7738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7738_cast_fp16 = einsum(equation = var_7738_equation_0, values = (var_7290_cast_fp16, var_7635_cast_fp16))[name = tensor("op_7738_cast_fp16")]; tensor var_7740_equation_0 = const()[name = tensor("op_7740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7740_cast_fp16 = einsum(equation = var_7740_equation_0, values = (var_7290_cast_fp16, var_7636_cast_fp16))[name = tensor("op_7740_cast_fp16")]; tensor var_7742_equation_0 = const()[name = tensor("op_7742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7742_cast_fp16 = einsum(equation = var_7742_equation_0, values = (var_7290_cast_fp16, var_7637_cast_fp16))[name = tensor("op_7742_cast_fp16")]; tensor var_7744_equation_0 = const()[name = tensor("op_7744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7744_cast_fp16 = einsum(equation = var_7744_equation_0, values = (var_7290_cast_fp16, var_7638_cast_fp16))[name = tensor("op_7744_cast_fp16")]; tensor var_7746_equation_0 = const()[name = tensor("op_7746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7746_cast_fp16 = einsum(equation = var_7746_equation_0, values = (var_7290_cast_fp16, var_7639_cast_fp16))[name = tensor("op_7746_cast_fp16")]; tensor var_7748_equation_0 = const()[name = tensor("op_7748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7748_cast_fp16 = einsum(equation = var_7748_equation_0, values = (var_7290_cast_fp16, var_7640_cast_fp16))[name = tensor("op_7748_cast_fp16")]; tensor var_7750_equation_0 = const()[name = tensor("op_7750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7750_cast_fp16 = einsum(equation = var_7750_equation_0, values = (var_7294_cast_fp16, var_7641_cast_fp16))[name = tensor("op_7750_cast_fp16")]; tensor var_7752_equation_0 = const()[name = tensor("op_7752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7752_cast_fp16 = einsum(equation = var_7752_equation_0, values = (var_7294_cast_fp16, var_7642_cast_fp16))[name = tensor("op_7752_cast_fp16")]; tensor var_7754_equation_0 = const()[name = tensor("op_7754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7754_cast_fp16 = einsum(equation = var_7754_equation_0, values = (var_7294_cast_fp16, var_7643_cast_fp16))[name = tensor("op_7754_cast_fp16")]; tensor var_7756_equation_0 = const()[name = tensor("op_7756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7756_cast_fp16 = einsum(equation = var_7756_equation_0, values = (var_7294_cast_fp16, var_7644_cast_fp16))[name = tensor("op_7756_cast_fp16")]; tensor var_7758_equation_0 = const()[name = tensor("op_7758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7758_cast_fp16 = einsum(equation = var_7758_equation_0, values = (var_7294_cast_fp16, var_7645_cast_fp16))[name = tensor("op_7758_cast_fp16")]; tensor var_7760_equation_0 = const()[name = tensor("op_7760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7760_cast_fp16 = einsum(equation = var_7760_equation_0, values = (var_7294_cast_fp16, var_7646_cast_fp16))[name = tensor("op_7760_cast_fp16")]; tensor var_7762_equation_0 = const()[name = tensor("op_7762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7762_cast_fp16 = einsum(equation = var_7762_equation_0, values = (var_7298_cast_fp16, var_7647_cast_fp16))[name = tensor("op_7762_cast_fp16")]; tensor var_7764_equation_0 = const()[name = tensor("op_7764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7764_cast_fp16 = einsum(equation = var_7764_equation_0, values = (var_7298_cast_fp16, var_7648_cast_fp16))[name = tensor("op_7764_cast_fp16")]; tensor var_7766_equation_0 = const()[name = tensor("op_7766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7766_cast_fp16 = einsum(equation = var_7766_equation_0, values = (var_7298_cast_fp16, var_7649_cast_fp16))[name = tensor("op_7766_cast_fp16")]; tensor var_7768_equation_0 = const()[name = tensor("op_7768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7768_cast_fp16 = einsum(equation = var_7768_equation_0, values = (var_7298_cast_fp16, var_7650_cast_fp16))[name = tensor("op_7768_cast_fp16")]; tensor var_7770_equation_0 = const()[name = tensor("op_7770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7770_cast_fp16 = einsum(equation = var_7770_equation_0, values = (var_7298_cast_fp16, var_7651_cast_fp16))[name = tensor("op_7770_cast_fp16")]; tensor var_7772_equation_0 = const()[name = tensor("op_7772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7772_cast_fp16 = einsum(equation = var_7772_equation_0, values = (var_7298_cast_fp16, var_7652_cast_fp16))[name = tensor("op_7772_cast_fp16")]; tensor var_7774_equation_0 = const()[name = tensor("op_7774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7774_cast_fp16 = einsum(equation = var_7774_equation_0, values = (var_7302_cast_fp16, var_7653_cast_fp16))[name = tensor("op_7774_cast_fp16")]; tensor var_7776_equation_0 = const()[name = tensor("op_7776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7776_cast_fp16 = einsum(equation = var_7776_equation_0, values = (var_7302_cast_fp16, var_7654_cast_fp16))[name = tensor("op_7776_cast_fp16")]; tensor var_7778_equation_0 = const()[name = tensor("op_7778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7778_cast_fp16 = einsum(equation = var_7778_equation_0, values = (var_7302_cast_fp16, var_7655_cast_fp16))[name = tensor("op_7778_cast_fp16")]; tensor var_7780_equation_0 = const()[name = tensor("op_7780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7780_cast_fp16 = einsum(equation = var_7780_equation_0, values = (var_7302_cast_fp16, var_7656_cast_fp16))[name = tensor("op_7780_cast_fp16")]; tensor var_7782_equation_0 = const()[name = tensor("op_7782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7782_cast_fp16 = einsum(equation = var_7782_equation_0, values = (var_7302_cast_fp16, var_7657_cast_fp16))[name = tensor("op_7782_cast_fp16")]; tensor var_7784_equation_0 = const()[name = tensor("op_7784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7784_cast_fp16 = einsum(equation = var_7784_equation_0, values = (var_7302_cast_fp16, var_7658_cast_fp16))[name = tensor("op_7784_cast_fp16")]; tensor var_7786_equation_0 = const()[name = tensor("op_7786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7786_cast_fp16 = einsum(equation = var_7786_equation_0, values = (var_7306_cast_fp16, var_7659_cast_fp16))[name = tensor("op_7786_cast_fp16")]; tensor var_7788_equation_0 = const()[name = tensor("op_7788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7788_cast_fp16 = einsum(equation = var_7788_equation_0, values = (var_7306_cast_fp16, var_7660_cast_fp16))[name = tensor("op_7788_cast_fp16")]; tensor var_7790_equation_0 = const()[name = tensor("op_7790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7790_cast_fp16 = einsum(equation = var_7790_equation_0, values = (var_7306_cast_fp16, var_7661_cast_fp16))[name = tensor("op_7790_cast_fp16")]; tensor var_7792_equation_0 = const()[name = tensor("op_7792_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7792_cast_fp16 = einsum(equation = var_7792_equation_0, values = (var_7306_cast_fp16, var_7662_cast_fp16))[name = tensor("op_7792_cast_fp16")]; tensor var_7794_equation_0 = const()[name = tensor("op_7794_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7794_cast_fp16 = einsum(equation = var_7794_equation_0, values = (var_7306_cast_fp16, var_7663_cast_fp16))[name = tensor("op_7794_cast_fp16")]; tensor var_7796_equation_0 = const()[name = tensor("op_7796_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7796_cast_fp16 = einsum(equation = var_7796_equation_0, values = (var_7306_cast_fp16, var_7664_cast_fp16))[name = tensor("op_7796_cast_fp16")]; tensor var_7798_equation_0 = const()[name = tensor("op_7798_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7798_cast_fp16 = einsum(equation = var_7798_equation_0, values = (var_7310_cast_fp16, var_7665_cast_fp16))[name = tensor("op_7798_cast_fp16")]; tensor var_7800_equation_0 = const()[name = tensor("op_7800_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7800_cast_fp16 = einsum(equation = var_7800_equation_0, values = (var_7310_cast_fp16, var_7666_cast_fp16))[name = tensor("op_7800_cast_fp16")]; tensor var_7802_equation_0 = const()[name = tensor("op_7802_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7802_cast_fp16 = einsum(equation = var_7802_equation_0, values = (var_7310_cast_fp16, var_7667_cast_fp16))[name = tensor("op_7802_cast_fp16")]; tensor var_7804_equation_0 = const()[name = tensor("op_7804_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7804_cast_fp16 = einsum(equation = var_7804_equation_0, values = (var_7310_cast_fp16, var_7668_cast_fp16))[name = tensor("op_7804_cast_fp16")]; tensor var_7806_equation_0 = const()[name = tensor("op_7806_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7806_cast_fp16 = einsum(equation = var_7806_equation_0, values = (var_7310_cast_fp16, var_7669_cast_fp16))[name = tensor("op_7806_cast_fp16")]; tensor var_7808_equation_0 = const()[name = tensor("op_7808_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7808_cast_fp16 = einsum(equation = var_7808_equation_0, values = (var_7310_cast_fp16, var_7670_cast_fp16))[name = tensor("op_7808_cast_fp16")]; tensor var_7810_equation_0 = const()[name = tensor("op_7810_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7810_cast_fp16 = einsum(equation = var_7810_equation_0, values = (var_7314_cast_fp16, var_7671_cast_fp16))[name = tensor("op_7810_cast_fp16")]; tensor var_7812_equation_0 = const()[name = tensor("op_7812_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7812_cast_fp16 = einsum(equation = var_7812_equation_0, values = (var_7314_cast_fp16, var_7672_cast_fp16))[name = tensor("op_7812_cast_fp16")]; tensor var_7814_equation_0 = const()[name = tensor("op_7814_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7814_cast_fp16 = einsum(equation = var_7814_equation_0, values = (var_7314_cast_fp16, var_7673_cast_fp16))[name = tensor("op_7814_cast_fp16")]; tensor var_7816_equation_0 = const()[name = tensor("op_7816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7816_cast_fp16 = einsum(equation = var_7816_equation_0, values = (var_7314_cast_fp16, var_7674_cast_fp16))[name = tensor("op_7816_cast_fp16")]; tensor var_7818_equation_0 = const()[name = tensor("op_7818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7818_cast_fp16 = einsum(equation = var_7818_equation_0, values = (var_7314_cast_fp16, var_7675_cast_fp16))[name = tensor("op_7818_cast_fp16")]; tensor var_7820_equation_0 = const()[name = tensor("op_7820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7820_cast_fp16 = einsum(equation = var_7820_equation_0, values = (var_7314_cast_fp16, var_7676_cast_fp16))[name = tensor("op_7820_cast_fp16")]; tensor var_7822_interleave_0 = const()[name = tensor("op_7822_interleave_0"), val = tensor(false)]; tensor var_7822_cast_fp16 = concat(axis = var_7033, interleave = var_7822_interleave_0, values = (var_7678_cast_fp16, var_7680_cast_fp16, var_7682_cast_fp16, var_7684_cast_fp16, var_7686_cast_fp16, var_7688_cast_fp16))[name = tensor("op_7822_cast_fp16")]; tensor var_7824_interleave_0 = const()[name = tensor("op_7824_interleave_0"), val = tensor(false)]; tensor var_7824_cast_fp16 = concat(axis = var_7033, interleave = var_7824_interleave_0, values = (var_7690_cast_fp16, var_7692_cast_fp16, var_7694_cast_fp16, var_7696_cast_fp16, var_7698_cast_fp16, var_7700_cast_fp16))[name = tensor("op_7824_cast_fp16")]; tensor var_7826_interleave_0 = const()[name = tensor("op_7826_interleave_0"), val = tensor(false)]; tensor var_7826_cast_fp16 = concat(axis = var_7033, interleave = var_7826_interleave_0, values = (var_7702_cast_fp16, var_7704_cast_fp16, var_7706_cast_fp16, var_7708_cast_fp16, var_7710_cast_fp16, var_7712_cast_fp16))[name = tensor("op_7826_cast_fp16")]; tensor var_7828_interleave_0 = const()[name = tensor("op_7828_interleave_0"), val = tensor(false)]; tensor var_7828_cast_fp16 = concat(axis = var_7033, interleave = var_7828_interleave_0, values = (var_7714_cast_fp16, var_7716_cast_fp16, var_7718_cast_fp16, var_7720_cast_fp16, var_7722_cast_fp16, var_7724_cast_fp16))[name = tensor("op_7828_cast_fp16")]; tensor var_7830_interleave_0 = const()[name = tensor("op_7830_interleave_0"), val = tensor(false)]; tensor var_7830_cast_fp16 = concat(axis = var_7033, interleave = var_7830_interleave_0, values = (var_7726_cast_fp16, var_7728_cast_fp16, var_7730_cast_fp16, var_7732_cast_fp16, var_7734_cast_fp16, var_7736_cast_fp16))[name = tensor("op_7830_cast_fp16")]; tensor var_7832_interleave_0 = const()[name = tensor("op_7832_interleave_0"), val = tensor(false)]; tensor var_7832_cast_fp16 = concat(axis = var_7033, interleave = var_7832_interleave_0, values = (var_7738_cast_fp16, var_7740_cast_fp16, var_7742_cast_fp16, var_7744_cast_fp16, var_7746_cast_fp16, var_7748_cast_fp16))[name = tensor("op_7832_cast_fp16")]; tensor var_7834_interleave_0 = const()[name = tensor("op_7834_interleave_0"), val = tensor(false)]; tensor var_7834_cast_fp16 = concat(axis = var_7033, interleave = var_7834_interleave_0, values = (var_7750_cast_fp16, var_7752_cast_fp16, var_7754_cast_fp16, var_7756_cast_fp16, var_7758_cast_fp16, var_7760_cast_fp16))[name = tensor("op_7834_cast_fp16")]; tensor var_7836_interleave_0 = const()[name = tensor("op_7836_interleave_0"), val = tensor(false)]; tensor var_7836_cast_fp16 = concat(axis = var_7033, interleave = var_7836_interleave_0, values = (var_7762_cast_fp16, var_7764_cast_fp16, var_7766_cast_fp16, var_7768_cast_fp16, var_7770_cast_fp16, var_7772_cast_fp16))[name = tensor("op_7836_cast_fp16")]; tensor var_7838_interleave_0 = const()[name = tensor("op_7838_interleave_0"), val = tensor(false)]; tensor var_7838_cast_fp16 = concat(axis = var_7033, interleave = var_7838_interleave_0, values = (var_7774_cast_fp16, var_7776_cast_fp16, var_7778_cast_fp16, var_7780_cast_fp16, var_7782_cast_fp16, var_7784_cast_fp16))[name = tensor("op_7838_cast_fp16")]; tensor var_7840_interleave_0 = const()[name = tensor("op_7840_interleave_0"), val = tensor(false)]; tensor var_7840_cast_fp16 = concat(axis = var_7033, interleave = var_7840_interleave_0, values = (var_7786_cast_fp16, var_7788_cast_fp16, var_7790_cast_fp16, var_7792_cast_fp16, var_7794_cast_fp16, var_7796_cast_fp16))[name = tensor("op_7840_cast_fp16")]; tensor var_7842_interleave_0 = const()[name = tensor("op_7842_interleave_0"), val = tensor(false)]; tensor var_7842_cast_fp16 = concat(axis = var_7033, interleave = var_7842_interleave_0, values = (var_7798_cast_fp16, var_7800_cast_fp16, var_7802_cast_fp16, var_7804_cast_fp16, var_7806_cast_fp16, var_7808_cast_fp16))[name = tensor("op_7842_cast_fp16")]; tensor var_7844_interleave_0 = const()[name = tensor("op_7844_interleave_0"), val = tensor(false)]; tensor var_7844_cast_fp16 = concat(axis = var_7033, interleave = var_7844_interleave_0, values = (var_7810_cast_fp16, var_7812_cast_fp16, var_7814_cast_fp16, var_7816_cast_fp16, var_7818_cast_fp16, var_7820_cast_fp16))[name = tensor("op_7844_cast_fp16")]; tensor input_65_interleave_0 = const()[name = tensor("input_65_interleave_0"), val = tensor(false)]; tensor input_65_cast_fp16 = concat(axis = var_7049, interleave = input_65_interleave_0, values = (var_7822_cast_fp16, var_7824_cast_fp16, var_7826_cast_fp16, var_7828_cast_fp16, var_7830_cast_fp16, var_7832_cast_fp16, var_7834_cast_fp16, var_7836_cast_fp16, var_7838_cast_fp16, var_7840_cast_fp16, var_7842_cast_fp16, var_7844_cast_fp16))[name = tensor("input_65_cast_fp16")]; tensor obj_35_pad_type_0 = const()[name = tensor("obj_35_pad_type_0"), val = tensor("valid")]; tensor obj_35_strides_0 = const()[name = tensor("obj_35_strides_0"), val = tensor([1, 1])]; tensor obj_35_pad_0 = const()[name = tensor("obj_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_35_dilations_0 = const()[name = tensor("obj_35_dilations_0"), val = tensor([1, 1])]; tensor obj_35_groups_0 = const()[name = tensor("obj_35_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123165120)))]; tensor layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124344832)))]; tensor obj_35_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = obj_35_dilations_0, groups = obj_35_groups_0, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = obj_35_strides_0, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_65_cast_fp16)[name = tensor("obj_35_cast_fp16")]; tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; tensor var_7863_to_fp16 = const()[name = tensor("op_7863_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_7863_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; tensor input_67_gamma_0_to_fp16 = const()[name = tensor("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124346432)))]; tensor input_67_beta_0_to_fp16 = const()[name = tensor("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124348032)))]; tensor input_67_epsilon_0_to_fp16 = const()[name = tensor("input_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_67_cast_fp16")]; tensor input_69_pad_type_0 = const()[name = tensor("input_69_pad_type_0"), val = tensor("valid")]; tensor input_69_strides_0 = const()[name = tensor("input_69_strides_0"), val = tensor([1, 1])]; tensor input_69_pad_0 = const()[name = tensor("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_69_dilations_0 = const()[name = tensor("input_69_dilations_0"), val = tensor([1, 1])]; tensor input_69_groups_0 = const()[name = tensor("input_69_groups_0"), val = tensor(1)]; tensor layers_8_fc1_weight_to_fp16 = const()[name = tensor("layers_8_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124349632)))]; tensor layers_8_fc1_bias_to_fp16 = const()[name = tensor("layers_8_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129068288)))]; tensor input_69_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = layers_8_fc1_weight_to_fp16, x = input_67_cast_fp16)[name = tensor("input_69_cast_fp16")]; tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor hidden_states_21_pad_type_0 = const()[name = tensor("hidden_states_21_pad_type_0"), val = tensor("valid")]; tensor hidden_states_21_strides_0 = const()[name = tensor("hidden_states_21_strides_0"), val = tensor([1, 1])]; tensor hidden_states_21_pad_0 = const()[name = tensor("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_21_dilations_0 = const()[name = tensor("hidden_states_21_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_21_groups_0 = const()[name = tensor("hidden_states_21_groups_0"), val = tensor(1)]; tensor layers_8_fc2_weight_to_fp16 = const()[name = tensor("layers_8_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129074496)))]; tensor layers_8_fc2_bias_to_fp16 = const()[name = tensor("layers_8_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133793152)))]; tensor hidden_states_21_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = hidden_states_21_dilations_0, groups = hidden_states_21_groups_0, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = hidden_states_21_strides_0, weight = layers_8_fc2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; tensor var_7895 = const()[name = tensor("op_7895"), val = tensor(3)]; tensor var_7911 = const()[name = tensor("op_7911"), val = tensor(1)]; tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; tensor var_7928_to_fp16 = const()[name = tensor("op_7928_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_7928_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133794752)))]; tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133796352)))]; tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_37_cast_fp16")]; tensor query_19_pad_type_0 = const()[name = tensor("query_19_pad_type_0"), val = tensor("valid")]; tensor query_19_strides_0 = const()[name = tensor("query_19_strides_0"), val = tensor([1, 1])]; tensor query_19_pad_0 = const()[name = tensor("query_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_19_dilations_0 = const()[name = tensor("query_19_dilations_0"), val = tensor([1, 1])]; tensor query_19_groups_0 = const()[name = tensor("query_19_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133797952)))]; tensor layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134977664)))]; tensor query_19_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = query_19_dilations_0, groups = query_19_groups_0, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = query_19_strides_0, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor("query_19_cast_fp16")]; tensor key_19_pad_type_0 = const()[name = tensor("key_19_pad_type_0"), val = tensor("valid")]; tensor key_19_strides_0 = const()[name = tensor("key_19_strides_0"), val = tensor([1, 1])]; tensor key_19_pad_0 = const()[name = tensor("key_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_19_dilations_0 = const()[name = tensor("key_19_dilations_0"), val = tensor([1, 1])]; tensor key_19_groups_0 = const()[name = tensor("key_19_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134979264)))]; tensor key_19_cast_fp16 = conv(dilations = key_19_dilations_0, groups = key_19_groups_0, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = key_19_strides_0, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor("key_19_cast_fp16")]; tensor value_19_pad_type_0 = const()[name = tensor("value_19_pad_type_0"), val = tensor("valid")]; tensor value_19_strides_0 = const()[name = tensor("value_19_strides_0"), val = tensor([1, 1])]; tensor value_19_pad_0 = const()[name = tensor("value_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_19_dilations_0 = const()[name = tensor("value_19_dilations_0"), val = tensor([1, 1])]; tensor value_19_groups_0 = const()[name = tensor("value_19_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136158976)))]; tensor layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137338688)))]; tensor value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = value_19_dilations_0, groups = value_19_groups_0, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = value_19_strides_0, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor("value_19_cast_fp16")]; tensor var_7963_begin_0 = const()[name = tensor("op_7963_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7963_end_0 = const()[name = tensor("op_7963_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7963_end_mask_0 = const()[name = tensor("op_7963_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7963_cast_fp16 = slice_by_index(begin = var_7963_begin_0, end = var_7963_end_0, end_mask = var_7963_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7963_cast_fp16")]; tensor var_7967_begin_0 = const()[name = tensor("op_7967_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_7967_end_0 = const()[name = tensor("op_7967_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_7967_end_mask_0 = const()[name = tensor("op_7967_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7967_cast_fp16 = slice_by_index(begin = var_7967_begin_0, end = var_7967_end_0, end_mask = var_7967_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7967_cast_fp16")]; tensor var_7971_begin_0 = const()[name = tensor("op_7971_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_7971_end_0 = const()[name = tensor("op_7971_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_7971_end_mask_0 = const()[name = tensor("op_7971_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7971_cast_fp16 = slice_by_index(begin = var_7971_begin_0, end = var_7971_end_0, end_mask = var_7971_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7971_cast_fp16")]; tensor var_7975_begin_0 = const()[name = tensor("op_7975_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_7975_end_0 = const()[name = tensor("op_7975_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_7975_end_mask_0 = const()[name = tensor("op_7975_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7975_cast_fp16 = slice_by_index(begin = var_7975_begin_0, end = var_7975_end_0, end_mask = var_7975_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7975_cast_fp16")]; tensor var_7979_begin_0 = const()[name = tensor("op_7979_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_7979_end_0 = const()[name = tensor("op_7979_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_7979_end_mask_0 = const()[name = tensor("op_7979_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7979_cast_fp16 = slice_by_index(begin = var_7979_begin_0, end = var_7979_end_0, end_mask = var_7979_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7979_cast_fp16")]; tensor var_7983_begin_0 = const()[name = tensor("op_7983_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7983_end_0 = const()[name = tensor("op_7983_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_7983_end_mask_0 = const()[name = tensor("op_7983_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7983_cast_fp16 = slice_by_index(begin = var_7983_begin_0, end = var_7983_end_0, end_mask = var_7983_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7983_cast_fp16")]; tensor var_7987_begin_0 = const()[name = tensor("op_7987_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_7987_end_0 = const()[name = tensor("op_7987_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_7987_end_mask_0 = const()[name = tensor("op_7987_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7987_cast_fp16 = slice_by_index(begin = var_7987_begin_0, end = var_7987_end_0, end_mask = var_7987_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7987_cast_fp16")]; tensor var_7991_begin_0 = const()[name = tensor("op_7991_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_7991_end_0 = const()[name = tensor("op_7991_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_7991_end_mask_0 = const()[name = tensor("op_7991_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7991_cast_fp16 = slice_by_index(begin = var_7991_begin_0, end = var_7991_end_0, end_mask = var_7991_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7991_cast_fp16")]; tensor var_7995_begin_0 = const()[name = tensor("op_7995_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_7995_end_0 = const()[name = tensor("op_7995_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_7995_end_mask_0 = const()[name = tensor("op_7995_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7995_cast_fp16 = slice_by_index(begin = var_7995_begin_0, end = var_7995_end_0, end_mask = var_7995_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7995_cast_fp16")]; tensor var_7999_begin_0 = const()[name = tensor("op_7999_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_7999_end_0 = const()[name = tensor("op_7999_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_7999_end_mask_0 = const()[name = tensor("op_7999_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7999_cast_fp16 = slice_by_index(begin = var_7999_begin_0, end = var_7999_end_0, end_mask = var_7999_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_7999_cast_fp16")]; tensor var_8003_begin_0 = const()[name = tensor("op_8003_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_8003_end_0 = const()[name = tensor("op_8003_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_8003_end_mask_0 = const()[name = tensor("op_8003_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8003_cast_fp16 = slice_by_index(begin = var_8003_begin_0, end = var_8003_end_0, end_mask = var_8003_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_8003_cast_fp16")]; tensor var_8007_begin_0 = const()[name = tensor("op_8007_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_8007_end_0 = const()[name = tensor("op_8007_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_8007_end_mask_0 = const()[name = tensor("op_8007_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8007_cast_fp16 = slice_by_index(begin = var_8007_begin_0, end = var_8007_end_0, end_mask = var_8007_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_8007_cast_fp16")]; tensor var_8010_begin_0 = const()[name = tensor("op_8010_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8010_end_0 = const()[name = tensor("op_8010_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8010_end_mask_0 = const()[name = tensor("op_8010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8010_cast_fp16 = slice_by_index(begin = var_8010_begin_0, end = var_8010_end_0, end_mask = var_8010_end_mask_0, x = var_7963_cast_fp16)[name = tensor("op_8010_cast_fp16")]; tensor var_8011_begin_0 = const()[name = tensor("op_8011_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8011_end_0 = const()[name = tensor("op_8011_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8011_end_mask_0 = const()[name = tensor("op_8011_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8011_cast_fp16 = slice_by_index(begin = var_8011_begin_0, end = var_8011_end_0, end_mask = var_8011_end_mask_0, x = var_7963_cast_fp16)[name = tensor("op_8011_cast_fp16")]; tensor var_8012_begin_0 = const()[name = tensor("op_8012_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8012_end_0 = const()[name = tensor("op_8012_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8012_end_mask_0 = const()[name = tensor("op_8012_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8012_cast_fp16 = slice_by_index(begin = var_8012_begin_0, end = var_8012_end_0, end_mask = var_8012_end_mask_0, x = var_7963_cast_fp16)[name = tensor("op_8012_cast_fp16")]; tensor var_8013_begin_0 = const()[name = tensor("op_8013_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8013_end_0 = const()[name = tensor("op_8013_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8013_end_mask_0 = const()[name = tensor("op_8013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8013_cast_fp16 = slice_by_index(begin = var_8013_begin_0, end = var_8013_end_0, end_mask = var_8013_end_mask_0, x = var_7963_cast_fp16)[name = tensor("op_8013_cast_fp16")]; tensor var_8014_begin_0 = const()[name = tensor("op_8014_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8014_end_0 = const()[name = tensor("op_8014_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8014_end_mask_0 = const()[name = tensor("op_8014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8014_cast_fp16 = slice_by_index(begin = var_8014_begin_0, end = var_8014_end_0, end_mask = var_8014_end_mask_0, x = var_7963_cast_fp16)[name = tensor("op_8014_cast_fp16")]; tensor var_8015_begin_0 = const()[name = tensor("op_8015_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8015_end_0 = const()[name = tensor("op_8015_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8015_end_mask_0 = const()[name = tensor("op_8015_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8015_cast_fp16 = slice_by_index(begin = var_8015_begin_0, end = var_8015_end_0, end_mask = var_8015_end_mask_0, x = var_7963_cast_fp16)[name = tensor("op_8015_cast_fp16")]; tensor var_8016_begin_0 = const()[name = tensor("op_8016_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8016_end_0 = const()[name = tensor("op_8016_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8016_end_mask_0 = const()[name = tensor("op_8016_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8016_cast_fp16 = slice_by_index(begin = var_8016_begin_0, end = var_8016_end_0, end_mask = var_8016_end_mask_0, x = var_7967_cast_fp16)[name = tensor("op_8016_cast_fp16")]; tensor var_8017_begin_0 = const()[name = tensor("op_8017_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8017_end_0 = const()[name = tensor("op_8017_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8017_end_mask_0 = const()[name = tensor("op_8017_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8017_cast_fp16 = slice_by_index(begin = var_8017_begin_0, end = var_8017_end_0, end_mask = var_8017_end_mask_0, x = var_7967_cast_fp16)[name = tensor("op_8017_cast_fp16")]; tensor var_8018_begin_0 = const()[name = tensor("op_8018_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8018_end_0 = const()[name = tensor("op_8018_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8018_end_mask_0 = const()[name = tensor("op_8018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8018_cast_fp16 = slice_by_index(begin = var_8018_begin_0, end = var_8018_end_0, end_mask = var_8018_end_mask_0, x = var_7967_cast_fp16)[name = tensor("op_8018_cast_fp16")]; tensor var_8019_begin_0 = const()[name = tensor("op_8019_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8019_end_0 = const()[name = tensor("op_8019_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8019_end_mask_0 = const()[name = tensor("op_8019_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8019_cast_fp16 = slice_by_index(begin = var_8019_begin_0, end = var_8019_end_0, end_mask = var_8019_end_mask_0, x = var_7967_cast_fp16)[name = tensor("op_8019_cast_fp16")]; tensor var_8020_begin_0 = const()[name = tensor("op_8020_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8020_end_0 = const()[name = tensor("op_8020_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8020_end_mask_0 = const()[name = tensor("op_8020_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8020_cast_fp16 = slice_by_index(begin = var_8020_begin_0, end = var_8020_end_0, end_mask = var_8020_end_mask_0, x = var_7967_cast_fp16)[name = tensor("op_8020_cast_fp16")]; tensor var_8021_begin_0 = const()[name = tensor("op_8021_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8021_end_0 = const()[name = tensor("op_8021_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8021_end_mask_0 = const()[name = tensor("op_8021_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8021_cast_fp16 = slice_by_index(begin = var_8021_begin_0, end = var_8021_end_0, end_mask = var_8021_end_mask_0, x = var_7967_cast_fp16)[name = tensor("op_8021_cast_fp16")]; tensor var_8022_begin_0 = const()[name = tensor("op_8022_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8022_end_0 = const()[name = tensor("op_8022_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8022_end_mask_0 = const()[name = tensor("op_8022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8022_cast_fp16 = slice_by_index(begin = var_8022_begin_0, end = var_8022_end_0, end_mask = var_8022_end_mask_0, x = var_7971_cast_fp16)[name = tensor("op_8022_cast_fp16")]; tensor var_8023_begin_0 = const()[name = tensor("op_8023_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8023_end_0 = const()[name = tensor("op_8023_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8023_end_mask_0 = const()[name = tensor("op_8023_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8023_cast_fp16 = slice_by_index(begin = var_8023_begin_0, end = var_8023_end_0, end_mask = var_8023_end_mask_0, x = var_7971_cast_fp16)[name = tensor("op_8023_cast_fp16")]; tensor var_8024_begin_0 = const()[name = tensor("op_8024_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8024_end_0 = const()[name = tensor("op_8024_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8024_end_mask_0 = const()[name = tensor("op_8024_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8024_cast_fp16 = slice_by_index(begin = var_8024_begin_0, end = var_8024_end_0, end_mask = var_8024_end_mask_0, x = var_7971_cast_fp16)[name = tensor("op_8024_cast_fp16")]; tensor var_8025_begin_0 = const()[name = tensor("op_8025_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8025_end_0 = const()[name = tensor("op_8025_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8025_end_mask_0 = const()[name = tensor("op_8025_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8025_cast_fp16 = slice_by_index(begin = var_8025_begin_0, end = var_8025_end_0, end_mask = var_8025_end_mask_0, x = var_7971_cast_fp16)[name = tensor("op_8025_cast_fp16")]; tensor var_8026_begin_0 = const()[name = tensor("op_8026_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8026_end_0 = const()[name = tensor("op_8026_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8026_end_mask_0 = const()[name = tensor("op_8026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8026_cast_fp16 = slice_by_index(begin = var_8026_begin_0, end = var_8026_end_0, end_mask = var_8026_end_mask_0, x = var_7971_cast_fp16)[name = tensor("op_8026_cast_fp16")]; tensor var_8027_begin_0 = const()[name = tensor("op_8027_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8027_end_0 = const()[name = tensor("op_8027_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8027_end_mask_0 = const()[name = tensor("op_8027_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8027_cast_fp16 = slice_by_index(begin = var_8027_begin_0, end = var_8027_end_0, end_mask = var_8027_end_mask_0, x = var_7971_cast_fp16)[name = tensor("op_8027_cast_fp16")]; tensor var_8028_begin_0 = const()[name = tensor("op_8028_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8028_end_0 = const()[name = tensor("op_8028_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8028_end_mask_0 = const()[name = tensor("op_8028_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8028_cast_fp16 = slice_by_index(begin = var_8028_begin_0, end = var_8028_end_0, end_mask = var_8028_end_mask_0, x = var_7975_cast_fp16)[name = tensor("op_8028_cast_fp16")]; tensor var_8029_begin_0 = const()[name = tensor("op_8029_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8029_end_0 = const()[name = tensor("op_8029_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8029_end_mask_0 = const()[name = tensor("op_8029_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8029_cast_fp16 = slice_by_index(begin = var_8029_begin_0, end = var_8029_end_0, end_mask = var_8029_end_mask_0, x = var_7975_cast_fp16)[name = tensor("op_8029_cast_fp16")]; tensor var_8030_begin_0 = const()[name = tensor("op_8030_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8030_end_0 = const()[name = tensor("op_8030_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8030_end_mask_0 = const()[name = tensor("op_8030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8030_cast_fp16 = slice_by_index(begin = var_8030_begin_0, end = var_8030_end_0, end_mask = var_8030_end_mask_0, x = var_7975_cast_fp16)[name = tensor("op_8030_cast_fp16")]; tensor var_8031_begin_0 = const()[name = tensor("op_8031_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8031_end_0 = const()[name = tensor("op_8031_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8031_end_mask_0 = const()[name = tensor("op_8031_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8031_cast_fp16 = slice_by_index(begin = var_8031_begin_0, end = var_8031_end_0, end_mask = var_8031_end_mask_0, x = var_7975_cast_fp16)[name = tensor("op_8031_cast_fp16")]; tensor var_8032_begin_0 = const()[name = tensor("op_8032_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8032_end_0 = const()[name = tensor("op_8032_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8032_end_mask_0 = const()[name = tensor("op_8032_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8032_cast_fp16 = slice_by_index(begin = var_8032_begin_0, end = var_8032_end_0, end_mask = var_8032_end_mask_0, x = var_7975_cast_fp16)[name = tensor("op_8032_cast_fp16")]; tensor var_8033_begin_0 = const()[name = tensor("op_8033_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8033_end_0 = const()[name = tensor("op_8033_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8033_end_mask_0 = const()[name = tensor("op_8033_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8033_cast_fp16 = slice_by_index(begin = var_8033_begin_0, end = var_8033_end_0, end_mask = var_8033_end_mask_0, x = var_7975_cast_fp16)[name = tensor("op_8033_cast_fp16")]; tensor var_8034_begin_0 = const()[name = tensor("op_8034_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8034_end_0 = const()[name = tensor("op_8034_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8034_end_mask_0 = const()[name = tensor("op_8034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8034_cast_fp16 = slice_by_index(begin = var_8034_begin_0, end = var_8034_end_0, end_mask = var_8034_end_mask_0, x = var_7979_cast_fp16)[name = tensor("op_8034_cast_fp16")]; tensor var_8035_begin_0 = const()[name = tensor("op_8035_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8035_end_0 = const()[name = tensor("op_8035_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8035_end_mask_0 = const()[name = tensor("op_8035_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8035_cast_fp16 = slice_by_index(begin = var_8035_begin_0, end = var_8035_end_0, end_mask = var_8035_end_mask_0, x = var_7979_cast_fp16)[name = tensor("op_8035_cast_fp16")]; tensor var_8036_begin_0 = const()[name = tensor("op_8036_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8036_end_0 = const()[name = tensor("op_8036_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8036_end_mask_0 = const()[name = tensor("op_8036_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8036_cast_fp16 = slice_by_index(begin = var_8036_begin_0, end = var_8036_end_0, end_mask = var_8036_end_mask_0, x = var_7979_cast_fp16)[name = tensor("op_8036_cast_fp16")]; tensor var_8037_begin_0 = const()[name = tensor("op_8037_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8037_end_0 = const()[name = tensor("op_8037_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8037_end_mask_0 = const()[name = tensor("op_8037_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8037_cast_fp16 = slice_by_index(begin = var_8037_begin_0, end = var_8037_end_0, end_mask = var_8037_end_mask_0, x = var_7979_cast_fp16)[name = tensor("op_8037_cast_fp16")]; tensor var_8038_begin_0 = const()[name = tensor("op_8038_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8038_end_0 = const()[name = tensor("op_8038_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8038_end_mask_0 = const()[name = tensor("op_8038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8038_cast_fp16 = slice_by_index(begin = var_8038_begin_0, end = var_8038_end_0, end_mask = var_8038_end_mask_0, x = var_7979_cast_fp16)[name = tensor("op_8038_cast_fp16")]; tensor var_8039_begin_0 = const()[name = tensor("op_8039_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8039_end_0 = const()[name = tensor("op_8039_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8039_end_mask_0 = const()[name = tensor("op_8039_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8039_cast_fp16 = slice_by_index(begin = var_8039_begin_0, end = var_8039_end_0, end_mask = var_8039_end_mask_0, x = var_7979_cast_fp16)[name = tensor("op_8039_cast_fp16")]; tensor var_8040_begin_0 = const()[name = tensor("op_8040_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8040_end_0 = const()[name = tensor("op_8040_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8040_end_mask_0 = const()[name = tensor("op_8040_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8040_cast_fp16 = slice_by_index(begin = var_8040_begin_0, end = var_8040_end_0, end_mask = var_8040_end_mask_0, x = var_7983_cast_fp16)[name = tensor("op_8040_cast_fp16")]; tensor var_8041_begin_0 = const()[name = tensor("op_8041_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8041_end_0 = const()[name = tensor("op_8041_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8041_end_mask_0 = const()[name = tensor("op_8041_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8041_cast_fp16 = slice_by_index(begin = var_8041_begin_0, end = var_8041_end_0, end_mask = var_8041_end_mask_0, x = var_7983_cast_fp16)[name = tensor("op_8041_cast_fp16")]; tensor var_8042_begin_0 = const()[name = tensor("op_8042_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8042_end_0 = const()[name = tensor("op_8042_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8042_end_mask_0 = const()[name = tensor("op_8042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8042_cast_fp16 = slice_by_index(begin = var_8042_begin_0, end = var_8042_end_0, end_mask = var_8042_end_mask_0, x = var_7983_cast_fp16)[name = tensor("op_8042_cast_fp16")]; tensor var_8043_begin_0 = const()[name = tensor("op_8043_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8043_end_0 = const()[name = tensor("op_8043_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8043_end_mask_0 = const()[name = tensor("op_8043_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8043_cast_fp16 = slice_by_index(begin = var_8043_begin_0, end = var_8043_end_0, end_mask = var_8043_end_mask_0, x = var_7983_cast_fp16)[name = tensor("op_8043_cast_fp16")]; tensor var_8044_begin_0 = const()[name = tensor("op_8044_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8044_end_0 = const()[name = tensor("op_8044_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8044_end_mask_0 = const()[name = tensor("op_8044_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8044_cast_fp16 = slice_by_index(begin = var_8044_begin_0, end = var_8044_end_0, end_mask = var_8044_end_mask_0, x = var_7983_cast_fp16)[name = tensor("op_8044_cast_fp16")]; tensor var_8045_begin_0 = const()[name = tensor("op_8045_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8045_end_0 = const()[name = tensor("op_8045_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8045_end_mask_0 = const()[name = tensor("op_8045_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8045_cast_fp16 = slice_by_index(begin = var_8045_begin_0, end = var_8045_end_0, end_mask = var_8045_end_mask_0, x = var_7983_cast_fp16)[name = tensor("op_8045_cast_fp16")]; tensor var_8046_begin_0 = const()[name = tensor("op_8046_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8046_end_0 = const()[name = tensor("op_8046_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8046_end_mask_0 = const()[name = tensor("op_8046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8046_cast_fp16 = slice_by_index(begin = var_8046_begin_0, end = var_8046_end_0, end_mask = var_8046_end_mask_0, x = var_7987_cast_fp16)[name = tensor("op_8046_cast_fp16")]; tensor var_8047_begin_0 = const()[name = tensor("op_8047_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8047_end_0 = const()[name = tensor("op_8047_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8047_end_mask_0 = const()[name = tensor("op_8047_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8047_cast_fp16 = slice_by_index(begin = var_8047_begin_0, end = var_8047_end_0, end_mask = var_8047_end_mask_0, x = var_7987_cast_fp16)[name = tensor("op_8047_cast_fp16")]; tensor var_8048_begin_0 = const()[name = tensor("op_8048_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8048_end_0 = const()[name = tensor("op_8048_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8048_end_mask_0 = const()[name = tensor("op_8048_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8048_cast_fp16 = slice_by_index(begin = var_8048_begin_0, end = var_8048_end_0, end_mask = var_8048_end_mask_0, x = var_7987_cast_fp16)[name = tensor("op_8048_cast_fp16")]; tensor var_8049_begin_0 = const()[name = tensor("op_8049_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8049_end_0 = const()[name = tensor("op_8049_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8049_end_mask_0 = const()[name = tensor("op_8049_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8049_cast_fp16 = slice_by_index(begin = var_8049_begin_0, end = var_8049_end_0, end_mask = var_8049_end_mask_0, x = var_7987_cast_fp16)[name = tensor("op_8049_cast_fp16")]; tensor var_8050_begin_0 = const()[name = tensor("op_8050_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8050_end_0 = const()[name = tensor("op_8050_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8050_end_mask_0 = const()[name = tensor("op_8050_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8050_cast_fp16 = slice_by_index(begin = var_8050_begin_0, end = var_8050_end_0, end_mask = var_8050_end_mask_0, x = var_7987_cast_fp16)[name = tensor("op_8050_cast_fp16")]; tensor var_8051_begin_0 = const()[name = tensor("op_8051_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8051_end_0 = const()[name = tensor("op_8051_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8051_end_mask_0 = const()[name = tensor("op_8051_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8051_cast_fp16 = slice_by_index(begin = var_8051_begin_0, end = var_8051_end_0, end_mask = var_8051_end_mask_0, x = var_7987_cast_fp16)[name = tensor("op_8051_cast_fp16")]; tensor var_8052_begin_0 = const()[name = tensor("op_8052_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8052_end_0 = const()[name = tensor("op_8052_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8052_end_mask_0 = const()[name = tensor("op_8052_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8052_cast_fp16 = slice_by_index(begin = var_8052_begin_0, end = var_8052_end_0, end_mask = var_8052_end_mask_0, x = var_7991_cast_fp16)[name = tensor("op_8052_cast_fp16")]; tensor var_8053_begin_0 = const()[name = tensor("op_8053_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8053_end_0 = const()[name = tensor("op_8053_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8053_end_mask_0 = const()[name = tensor("op_8053_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8053_cast_fp16 = slice_by_index(begin = var_8053_begin_0, end = var_8053_end_0, end_mask = var_8053_end_mask_0, x = var_7991_cast_fp16)[name = tensor("op_8053_cast_fp16")]; tensor var_8054_begin_0 = const()[name = tensor("op_8054_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8054_end_0 = const()[name = tensor("op_8054_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8054_end_mask_0 = const()[name = tensor("op_8054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8054_cast_fp16 = slice_by_index(begin = var_8054_begin_0, end = var_8054_end_0, end_mask = var_8054_end_mask_0, x = var_7991_cast_fp16)[name = tensor("op_8054_cast_fp16")]; tensor var_8055_begin_0 = const()[name = tensor("op_8055_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8055_end_0 = const()[name = tensor("op_8055_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8055_end_mask_0 = const()[name = tensor("op_8055_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8055_cast_fp16 = slice_by_index(begin = var_8055_begin_0, end = var_8055_end_0, end_mask = var_8055_end_mask_0, x = var_7991_cast_fp16)[name = tensor("op_8055_cast_fp16")]; tensor var_8056_begin_0 = const()[name = tensor("op_8056_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8056_end_0 = const()[name = tensor("op_8056_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8056_end_mask_0 = const()[name = tensor("op_8056_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8056_cast_fp16 = slice_by_index(begin = var_8056_begin_0, end = var_8056_end_0, end_mask = var_8056_end_mask_0, x = var_7991_cast_fp16)[name = tensor("op_8056_cast_fp16")]; tensor var_8057_begin_0 = const()[name = tensor("op_8057_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8057_end_0 = const()[name = tensor("op_8057_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8057_end_mask_0 = const()[name = tensor("op_8057_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8057_cast_fp16 = slice_by_index(begin = var_8057_begin_0, end = var_8057_end_0, end_mask = var_8057_end_mask_0, x = var_7991_cast_fp16)[name = tensor("op_8057_cast_fp16")]; tensor var_8058_begin_0 = const()[name = tensor("op_8058_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8058_end_0 = const()[name = tensor("op_8058_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8058_end_mask_0 = const()[name = tensor("op_8058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8058_cast_fp16 = slice_by_index(begin = var_8058_begin_0, end = var_8058_end_0, end_mask = var_8058_end_mask_0, x = var_7995_cast_fp16)[name = tensor("op_8058_cast_fp16")]; tensor var_8059_begin_0 = const()[name = tensor("op_8059_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8059_end_0 = const()[name = tensor("op_8059_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8059_end_mask_0 = const()[name = tensor("op_8059_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8059_cast_fp16 = slice_by_index(begin = var_8059_begin_0, end = var_8059_end_0, end_mask = var_8059_end_mask_0, x = var_7995_cast_fp16)[name = tensor("op_8059_cast_fp16")]; tensor var_8060_begin_0 = const()[name = tensor("op_8060_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8060_end_0 = const()[name = tensor("op_8060_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8060_end_mask_0 = const()[name = tensor("op_8060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8060_cast_fp16 = slice_by_index(begin = var_8060_begin_0, end = var_8060_end_0, end_mask = var_8060_end_mask_0, x = var_7995_cast_fp16)[name = tensor("op_8060_cast_fp16")]; tensor var_8061_begin_0 = const()[name = tensor("op_8061_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8061_end_0 = const()[name = tensor("op_8061_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8061_end_mask_0 = const()[name = tensor("op_8061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8061_cast_fp16 = slice_by_index(begin = var_8061_begin_0, end = var_8061_end_0, end_mask = var_8061_end_mask_0, x = var_7995_cast_fp16)[name = tensor("op_8061_cast_fp16")]; tensor var_8062_begin_0 = const()[name = tensor("op_8062_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8062_end_0 = const()[name = tensor("op_8062_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8062_end_mask_0 = const()[name = tensor("op_8062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8062_cast_fp16 = slice_by_index(begin = var_8062_begin_0, end = var_8062_end_0, end_mask = var_8062_end_mask_0, x = var_7995_cast_fp16)[name = tensor("op_8062_cast_fp16")]; tensor var_8063_begin_0 = const()[name = tensor("op_8063_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8063_end_0 = const()[name = tensor("op_8063_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8063_end_mask_0 = const()[name = tensor("op_8063_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8063_cast_fp16 = slice_by_index(begin = var_8063_begin_0, end = var_8063_end_0, end_mask = var_8063_end_mask_0, x = var_7995_cast_fp16)[name = tensor("op_8063_cast_fp16")]; tensor var_8064_begin_0 = const()[name = tensor("op_8064_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8064_end_0 = const()[name = tensor("op_8064_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8064_end_mask_0 = const()[name = tensor("op_8064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8064_cast_fp16 = slice_by_index(begin = var_8064_begin_0, end = var_8064_end_0, end_mask = var_8064_end_mask_0, x = var_7999_cast_fp16)[name = tensor("op_8064_cast_fp16")]; tensor var_8065_begin_0 = const()[name = tensor("op_8065_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8065_end_0 = const()[name = tensor("op_8065_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8065_end_mask_0 = const()[name = tensor("op_8065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8065_cast_fp16 = slice_by_index(begin = var_8065_begin_0, end = var_8065_end_0, end_mask = var_8065_end_mask_0, x = var_7999_cast_fp16)[name = tensor("op_8065_cast_fp16")]; tensor var_8066_begin_0 = const()[name = tensor("op_8066_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8066_end_0 = const()[name = tensor("op_8066_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8066_end_mask_0 = const()[name = tensor("op_8066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8066_cast_fp16 = slice_by_index(begin = var_8066_begin_0, end = var_8066_end_0, end_mask = var_8066_end_mask_0, x = var_7999_cast_fp16)[name = tensor("op_8066_cast_fp16")]; tensor var_8067_begin_0 = const()[name = tensor("op_8067_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8067_end_0 = const()[name = tensor("op_8067_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8067_end_mask_0 = const()[name = tensor("op_8067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8067_cast_fp16 = slice_by_index(begin = var_8067_begin_0, end = var_8067_end_0, end_mask = var_8067_end_mask_0, x = var_7999_cast_fp16)[name = tensor("op_8067_cast_fp16")]; tensor var_8068_begin_0 = const()[name = tensor("op_8068_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8068_end_0 = const()[name = tensor("op_8068_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8068_end_mask_0 = const()[name = tensor("op_8068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8068_cast_fp16 = slice_by_index(begin = var_8068_begin_0, end = var_8068_end_0, end_mask = var_8068_end_mask_0, x = var_7999_cast_fp16)[name = tensor("op_8068_cast_fp16")]; tensor var_8069_begin_0 = const()[name = tensor("op_8069_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8069_end_0 = const()[name = tensor("op_8069_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8069_end_mask_0 = const()[name = tensor("op_8069_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8069_cast_fp16 = slice_by_index(begin = var_8069_begin_0, end = var_8069_end_0, end_mask = var_8069_end_mask_0, x = var_7999_cast_fp16)[name = tensor("op_8069_cast_fp16")]; tensor var_8070_begin_0 = const()[name = tensor("op_8070_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8070_end_0 = const()[name = tensor("op_8070_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8070_end_mask_0 = const()[name = tensor("op_8070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8070_cast_fp16 = slice_by_index(begin = var_8070_begin_0, end = var_8070_end_0, end_mask = var_8070_end_mask_0, x = var_8003_cast_fp16)[name = tensor("op_8070_cast_fp16")]; tensor var_8071_begin_0 = const()[name = tensor("op_8071_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8071_end_0 = const()[name = tensor("op_8071_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8071_end_mask_0 = const()[name = tensor("op_8071_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8071_cast_fp16 = slice_by_index(begin = var_8071_begin_0, end = var_8071_end_0, end_mask = var_8071_end_mask_0, x = var_8003_cast_fp16)[name = tensor("op_8071_cast_fp16")]; tensor var_8072_begin_0 = const()[name = tensor("op_8072_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8072_end_0 = const()[name = tensor("op_8072_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8072_end_mask_0 = const()[name = tensor("op_8072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8072_cast_fp16 = slice_by_index(begin = var_8072_begin_0, end = var_8072_end_0, end_mask = var_8072_end_mask_0, x = var_8003_cast_fp16)[name = tensor("op_8072_cast_fp16")]; tensor var_8073_begin_0 = const()[name = tensor("op_8073_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8073_end_0 = const()[name = tensor("op_8073_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8073_end_mask_0 = const()[name = tensor("op_8073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8073_cast_fp16 = slice_by_index(begin = var_8073_begin_0, end = var_8073_end_0, end_mask = var_8073_end_mask_0, x = var_8003_cast_fp16)[name = tensor("op_8073_cast_fp16")]; tensor var_8074_begin_0 = const()[name = tensor("op_8074_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8074_end_0 = const()[name = tensor("op_8074_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8074_end_mask_0 = const()[name = tensor("op_8074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8074_cast_fp16 = slice_by_index(begin = var_8074_begin_0, end = var_8074_end_0, end_mask = var_8074_end_mask_0, x = var_8003_cast_fp16)[name = tensor("op_8074_cast_fp16")]; tensor var_8075_begin_0 = const()[name = tensor("op_8075_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8075_end_0 = const()[name = tensor("op_8075_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8075_end_mask_0 = const()[name = tensor("op_8075_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8075_cast_fp16 = slice_by_index(begin = var_8075_begin_0, end = var_8075_end_0, end_mask = var_8075_end_mask_0, x = var_8003_cast_fp16)[name = tensor("op_8075_cast_fp16")]; tensor var_8076_begin_0 = const()[name = tensor("op_8076_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8076_end_0 = const()[name = tensor("op_8076_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8076_end_mask_0 = const()[name = tensor("op_8076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8076_cast_fp16 = slice_by_index(begin = var_8076_begin_0, end = var_8076_end_0, end_mask = var_8076_end_mask_0, x = var_8007_cast_fp16)[name = tensor("op_8076_cast_fp16")]; tensor var_8077_begin_0 = const()[name = tensor("op_8077_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8077_end_0 = const()[name = tensor("op_8077_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8077_end_mask_0 = const()[name = tensor("op_8077_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8077_cast_fp16 = slice_by_index(begin = var_8077_begin_0, end = var_8077_end_0, end_mask = var_8077_end_mask_0, x = var_8007_cast_fp16)[name = tensor("op_8077_cast_fp16")]; tensor var_8078_begin_0 = const()[name = tensor("op_8078_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8078_end_0 = const()[name = tensor("op_8078_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8078_end_mask_0 = const()[name = tensor("op_8078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8078_cast_fp16 = slice_by_index(begin = var_8078_begin_0, end = var_8078_end_0, end_mask = var_8078_end_mask_0, x = var_8007_cast_fp16)[name = tensor("op_8078_cast_fp16")]; tensor var_8079_begin_0 = const()[name = tensor("op_8079_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8079_end_0 = const()[name = tensor("op_8079_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8079_end_mask_0 = const()[name = tensor("op_8079_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8079_cast_fp16 = slice_by_index(begin = var_8079_begin_0, end = var_8079_end_0, end_mask = var_8079_end_mask_0, x = var_8007_cast_fp16)[name = tensor("op_8079_cast_fp16")]; tensor var_8080_begin_0 = const()[name = tensor("op_8080_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8080_end_0 = const()[name = tensor("op_8080_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8080_end_mask_0 = const()[name = tensor("op_8080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8080_cast_fp16 = slice_by_index(begin = var_8080_begin_0, end = var_8080_end_0, end_mask = var_8080_end_mask_0, x = var_8007_cast_fp16)[name = tensor("op_8080_cast_fp16")]; tensor var_8081_begin_0 = const()[name = tensor("op_8081_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8081_end_0 = const()[name = tensor("op_8081_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8081_end_mask_0 = const()[name = tensor("op_8081_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8081_cast_fp16 = slice_by_index(begin = var_8081_begin_0, end = var_8081_end_0, end_mask = var_8081_end_mask_0, x = var_8007_cast_fp16)[name = tensor("op_8081_cast_fp16")]; tensor k_19_perm_0 = const()[name = tensor("k_19_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_8086_begin_0 = const()[name = tensor("op_8086_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8086_end_0 = const()[name = tensor("op_8086_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_8086_end_mask_0 = const()[name = tensor("op_8086_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = key_19_cast_fp16)[name = tensor("transpose_2")]; tensor var_8086_cast_fp16 = slice_by_index(begin = var_8086_begin_0, end = var_8086_end_0, end_mask = var_8086_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8086_cast_fp16")]; tensor var_8090_begin_0 = const()[name = tensor("op_8090_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_8090_end_0 = const()[name = tensor("op_8090_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_8090_end_mask_0 = const()[name = tensor("op_8090_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8090_cast_fp16 = slice_by_index(begin = var_8090_begin_0, end = var_8090_end_0, end_mask = var_8090_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8090_cast_fp16")]; tensor var_8094_begin_0 = const()[name = tensor("op_8094_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_8094_end_0 = const()[name = tensor("op_8094_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_8094_end_mask_0 = const()[name = tensor("op_8094_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8094_cast_fp16 = slice_by_index(begin = var_8094_begin_0, end = var_8094_end_0, end_mask = var_8094_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8094_cast_fp16")]; tensor var_8098_begin_0 = const()[name = tensor("op_8098_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_8098_end_0 = const()[name = tensor("op_8098_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_8098_end_mask_0 = const()[name = tensor("op_8098_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8098_cast_fp16 = slice_by_index(begin = var_8098_begin_0, end = var_8098_end_0, end_mask = var_8098_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8098_cast_fp16")]; tensor var_8102_begin_0 = const()[name = tensor("op_8102_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8102_end_0 = const()[name = tensor("op_8102_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_8102_end_mask_0 = const()[name = tensor("op_8102_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8102_cast_fp16 = slice_by_index(begin = var_8102_begin_0, end = var_8102_end_0, end_mask = var_8102_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8102_cast_fp16")]; tensor var_8106_begin_0 = const()[name = tensor("op_8106_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_8106_end_0 = const()[name = tensor("op_8106_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_8106_end_mask_0 = const()[name = tensor("op_8106_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8106_cast_fp16 = slice_by_index(begin = var_8106_begin_0, end = var_8106_end_0, end_mask = var_8106_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8106_cast_fp16")]; tensor var_8110_begin_0 = const()[name = tensor("op_8110_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_8110_end_0 = const()[name = tensor("op_8110_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_8110_end_mask_0 = const()[name = tensor("op_8110_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8110_cast_fp16 = slice_by_index(begin = var_8110_begin_0, end = var_8110_end_0, end_mask = var_8110_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8110_cast_fp16")]; tensor var_8114_begin_0 = const()[name = tensor("op_8114_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_8114_end_0 = const()[name = tensor("op_8114_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_8114_end_mask_0 = const()[name = tensor("op_8114_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8114_cast_fp16 = slice_by_index(begin = var_8114_begin_0, end = var_8114_end_0, end_mask = var_8114_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8114_cast_fp16")]; tensor var_8118_begin_0 = const()[name = tensor("op_8118_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8118_end_0 = const()[name = tensor("op_8118_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_8118_end_mask_0 = const()[name = tensor("op_8118_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8118_cast_fp16 = slice_by_index(begin = var_8118_begin_0, end = var_8118_end_0, end_mask = var_8118_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8118_cast_fp16")]; tensor var_8122_begin_0 = const()[name = tensor("op_8122_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_8122_end_0 = const()[name = tensor("op_8122_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_8122_end_mask_0 = const()[name = tensor("op_8122_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8122_cast_fp16 = slice_by_index(begin = var_8122_begin_0, end = var_8122_end_0, end_mask = var_8122_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8122_cast_fp16")]; tensor var_8126_begin_0 = const()[name = tensor("op_8126_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_8126_end_0 = const()[name = tensor("op_8126_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_8126_end_mask_0 = const()[name = tensor("op_8126_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8126_cast_fp16 = slice_by_index(begin = var_8126_begin_0, end = var_8126_end_0, end_mask = var_8126_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8126_cast_fp16")]; tensor var_8130_begin_0 = const()[name = tensor("op_8130_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_8130_end_0 = const()[name = tensor("op_8130_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_8130_end_mask_0 = const()[name = tensor("op_8130_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8130_cast_fp16 = slice_by_index(begin = var_8130_begin_0, end = var_8130_end_0, end_mask = var_8130_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_8130_cast_fp16")]; tensor var_8132_begin_0 = const()[name = tensor("op_8132_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8132_end_0 = const()[name = tensor("op_8132_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8132_end_mask_0 = const()[name = tensor("op_8132_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8132_cast_fp16 = slice_by_index(begin = var_8132_begin_0, end = var_8132_end_0, end_mask = var_8132_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8132_cast_fp16")]; tensor var_8136_begin_0 = const()[name = tensor("op_8136_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_8136_end_0 = const()[name = tensor("op_8136_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_8136_end_mask_0 = const()[name = tensor("op_8136_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8136_cast_fp16 = slice_by_index(begin = var_8136_begin_0, end = var_8136_end_0, end_mask = var_8136_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8136_cast_fp16")]; tensor var_8140_begin_0 = const()[name = tensor("op_8140_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_8140_end_0 = const()[name = tensor("op_8140_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_8140_end_mask_0 = const()[name = tensor("op_8140_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8140_cast_fp16 = slice_by_index(begin = var_8140_begin_0, end = var_8140_end_0, end_mask = var_8140_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8140_cast_fp16")]; tensor var_8144_begin_0 = const()[name = tensor("op_8144_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_8144_end_0 = const()[name = tensor("op_8144_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_8144_end_mask_0 = const()[name = tensor("op_8144_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8144_cast_fp16 = slice_by_index(begin = var_8144_begin_0, end = var_8144_end_0, end_mask = var_8144_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8144_cast_fp16")]; tensor var_8148_begin_0 = const()[name = tensor("op_8148_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_8148_end_0 = const()[name = tensor("op_8148_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_8148_end_mask_0 = const()[name = tensor("op_8148_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8148_cast_fp16 = slice_by_index(begin = var_8148_begin_0, end = var_8148_end_0, end_mask = var_8148_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8148_cast_fp16")]; tensor var_8152_begin_0 = const()[name = tensor("op_8152_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8152_end_0 = const()[name = tensor("op_8152_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_8152_end_mask_0 = const()[name = tensor("op_8152_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8152_cast_fp16 = slice_by_index(begin = var_8152_begin_0, end = var_8152_end_0, end_mask = var_8152_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8152_cast_fp16")]; tensor var_8156_begin_0 = const()[name = tensor("op_8156_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_8156_end_0 = const()[name = tensor("op_8156_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_8156_end_mask_0 = const()[name = tensor("op_8156_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8156_cast_fp16 = slice_by_index(begin = var_8156_begin_0, end = var_8156_end_0, end_mask = var_8156_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8156_cast_fp16")]; tensor var_8160_begin_0 = const()[name = tensor("op_8160_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_8160_end_0 = const()[name = tensor("op_8160_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_8160_end_mask_0 = const()[name = tensor("op_8160_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8160_cast_fp16 = slice_by_index(begin = var_8160_begin_0, end = var_8160_end_0, end_mask = var_8160_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8160_cast_fp16")]; tensor var_8164_begin_0 = const()[name = tensor("op_8164_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_8164_end_0 = const()[name = tensor("op_8164_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_8164_end_mask_0 = const()[name = tensor("op_8164_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8164_cast_fp16 = slice_by_index(begin = var_8164_begin_0, end = var_8164_end_0, end_mask = var_8164_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8164_cast_fp16")]; tensor var_8168_begin_0 = const()[name = tensor("op_8168_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_8168_end_0 = const()[name = tensor("op_8168_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_8168_end_mask_0 = const()[name = tensor("op_8168_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8168_cast_fp16 = slice_by_index(begin = var_8168_begin_0, end = var_8168_end_0, end_mask = var_8168_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8168_cast_fp16")]; tensor var_8172_begin_0 = const()[name = tensor("op_8172_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_8172_end_0 = const()[name = tensor("op_8172_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_8172_end_mask_0 = const()[name = tensor("op_8172_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8172_cast_fp16 = slice_by_index(begin = var_8172_begin_0, end = var_8172_end_0, end_mask = var_8172_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8172_cast_fp16")]; tensor var_8176_begin_0 = const()[name = tensor("op_8176_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_8176_end_0 = const()[name = tensor("op_8176_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_8176_end_mask_0 = const()[name = tensor("op_8176_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8176_cast_fp16 = slice_by_index(begin = var_8176_begin_0, end = var_8176_end_0, end_mask = var_8176_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_8176_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1297_equation_0, values = (var_8086_cast_fp16, var_8010_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1299_equation_0, values = (var_8086_cast_fp16, var_8011_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1301_equation_0, values = (var_8086_cast_fp16, var_8012_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1303_equation_0, values = (var_8086_cast_fp16, var_8013_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1305_equation_0, values = (var_8086_cast_fp16, var_8014_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1307_equation_0, values = (var_8086_cast_fp16, var_8015_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1309_equation_0, values = (var_8090_cast_fp16, var_8016_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1311_equation_0, values = (var_8090_cast_fp16, var_8017_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1313_equation_0, values = (var_8090_cast_fp16, var_8018_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1315_equation_0, values = (var_8090_cast_fp16, var_8019_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1317_equation_0, values = (var_8090_cast_fp16, var_8020_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1319_equation_0, values = (var_8090_cast_fp16, var_8021_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1319_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1321_equation_0, values = (var_8094_cast_fp16, var_8022_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1323_equation_0, values = (var_8094_cast_fp16, var_8023_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1325_equation_0, values = (var_8094_cast_fp16, var_8024_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1327_equation_0, values = (var_8094_cast_fp16, var_8025_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1329_equation_0, values = (var_8094_cast_fp16, var_8026_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1331_equation_0, values = (var_8094_cast_fp16, var_8027_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1333_equation_0, values = (var_8098_cast_fp16, var_8028_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1335_equation_0, values = (var_8098_cast_fp16, var_8029_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1337_equation_0, values = (var_8098_cast_fp16, var_8030_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1339_equation_0, values = (var_8098_cast_fp16, var_8031_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1341_equation_0, values = (var_8098_cast_fp16, var_8032_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1343_equation_0, values = (var_8098_cast_fp16, var_8033_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1345_equation_0, values = (var_8102_cast_fp16, var_8034_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1347_equation_0, values = (var_8102_cast_fp16, var_8035_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1349_equation_0, values = (var_8102_cast_fp16, var_8036_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1351_equation_0, values = (var_8102_cast_fp16, var_8037_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1353_equation_0, values = (var_8102_cast_fp16, var_8038_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1355_equation_0, values = (var_8102_cast_fp16, var_8039_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1357_equation_0, values = (var_8106_cast_fp16, var_8040_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1359_equation_0, values = (var_8106_cast_fp16, var_8041_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1361_equation_0, values = (var_8106_cast_fp16, var_8042_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1363_equation_0, values = (var_8106_cast_fp16, var_8043_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1365_equation_0, values = (var_8106_cast_fp16, var_8044_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1367_equation_0, values = (var_8106_cast_fp16, var_8045_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1369_equation_0, values = (var_8110_cast_fp16, var_8046_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1371_equation_0, values = (var_8110_cast_fp16, var_8047_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1373_equation_0, values = (var_8110_cast_fp16, var_8048_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1375_equation_0, values = (var_8110_cast_fp16, var_8049_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1377_equation_0, values = (var_8110_cast_fp16, var_8050_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1379_equation_0, values = (var_8110_cast_fp16, var_8051_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1381_equation_0, values = (var_8114_cast_fp16, var_8052_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1383_equation_0, values = (var_8114_cast_fp16, var_8053_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1385_equation_0, values = (var_8114_cast_fp16, var_8054_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1387_equation_0, values = (var_8114_cast_fp16, var_8055_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1389_equation_0, values = (var_8114_cast_fp16, var_8056_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1391_equation_0, values = (var_8114_cast_fp16, var_8057_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1393_equation_0, values = (var_8118_cast_fp16, var_8058_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1395_equation_0, values = (var_8118_cast_fp16, var_8059_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1397_equation_0, values = (var_8118_cast_fp16, var_8060_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1399_equation_0, values = (var_8118_cast_fp16, var_8061_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1401_equation_0, values = (var_8118_cast_fp16, var_8062_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1403_equation_0, values = (var_8118_cast_fp16, var_8063_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1405_equation_0, values = (var_8122_cast_fp16, var_8064_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1407_equation_0, values = (var_8122_cast_fp16, var_8065_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1409_equation_0, values = (var_8122_cast_fp16, var_8066_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1411_equation_0, values = (var_8122_cast_fp16, var_8067_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1413_equation_0, values = (var_8122_cast_fp16, var_8068_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1415_equation_0, values = (var_8122_cast_fp16, var_8069_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1417_equation_0, values = (var_8126_cast_fp16, var_8070_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1419_equation_0, values = (var_8126_cast_fp16, var_8071_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1421_equation_0, values = (var_8126_cast_fp16, var_8072_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1423_equation_0, values = (var_8126_cast_fp16, var_8073_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1425_equation_0, values = (var_8126_cast_fp16, var_8074_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1427_equation_0, values = (var_8126_cast_fp16, var_8075_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1429_equation_0, values = (var_8130_cast_fp16, var_8076_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1431_equation_0, values = (var_8130_cast_fp16, var_8077_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1433_equation_0, values = (var_8130_cast_fp16, var_8078_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1435_equation_0, values = (var_8130_cast_fp16, var_8079_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1437_equation_0, values = (var_8130_cast_fp16, var_8080_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1439_equation_0, values = (var_8130_cast_fp16, var_8081_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1439_cast_fp16")]; tensor var_8323_to_fp16 = const()[name = tensor("op_8323_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1297_cast_fp16, y = var_8323_to_fp16)[name = tensor("aw_chunk_1297_cast_fp16")]; tensor var_8325_to_fp16 = const()[name = tensor("op_8325_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1299_cast_fp16, y = var_8325_to_fp16)[name = tensor("aw_chunk_1299_cast_fp16")]; tensor var_8327_to_fp16 = const()[name = tensor("op_8327_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1301_cast_fp16, y = var_8327_to_fp16)[name = tensor("aw_chunk_1301_cast_fp16")]; tensor var_8329_to_fp16 = const()[name = tensor("op_8329_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1303_cast_fp16, y = var_8329_to_fp16)[name = tensor("aw_chunk_1303_cast_fp16")]; tensor var_8331_to_fp16 = const()[name = tensor("op_8331_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1305_cast_fp16, y = var_8331_to_fp16)[name = tensor("aw_chunk_1305_cast_fp16")]; tensor var_8333_to_fp16 = const()[name = tensor("op_8333_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1307_cast_fp16, y = var_8333_to_fp16)[name = tensor("aw_chunk_1307_cast_fp16")]; tensor var_8335_to_fp16 = const()[name = tensor("op_8335_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1309_cast_fp16, y = var_8335_to_fp16)[name = tensor("aw_chunk_1309_cast_fp16")]; tensor var_8337_to_fp16 = const()[name = tensor("op_8337_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1311_cast_fp16, y = var_8337_to_fp16)[name = tensor("aw_chunk_1311_cast_fp16")]; tensor var_8339_to_fp16 = const()[name = tensor("op_8339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1313_cast_fp16, y = var_8339_to_fp16)[name = tensor("aw_chunk_1313_cast_fp16")]; tensor var_8341_to_fp16 = const()[name = tensor("op_8341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1315_cast_fp16, y = var_8341_to_fp16)[name = tensor("aw_chunk_1315_cast_fp16")]; tensor var_8343_to_fp16 = const()[name = tensor("op_8343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1317_cast_fp16, y = var_8343_to_fp16)[name = tensor("aw_chunk_1317_cast_fp16")]; tensor var_8345_to_fp16 = const()[name = tensor("op_8345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1319_cast_fp16, y = var_8345_to_fp16)[name = tensor("aw_chunk_1319_cast_fp16")]; tensor var_8347_to_fp16 = const()[name = tensor("op_8347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1321_cast_fp16, y = var_8347_to_fp16)[name = tensor("aw_chunk_1321_cast_fp16")]; tensor var_8349_to_fp16 = const()[name = tensor("op_8349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1323_cast_fp16, y = var_8349_to_fp16)[name = tensor("aw_chunk_1323_cast_fp16")]; tensor var_8351_to_fp16 = const()[name = tensor("op_8351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1325_cast_fp16, y = var_8351_to_fp16)[name = tensor("aw_chunk_1325_cast_fp16")]; tensor var_8353_to_fp16 = const()[name = tensor("op_8353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1327_cast_fp16, y = var_8353_to_fp16)[name = tensor("aw_chunk_1327_cast_fp16")]; tensor var_8355_to_fp16 = const()[name = tensor("op_8355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1329_cast_fp16, y = var_8355_to_fp16)[name = tensor("aw_chunk_1329_cast_fp16")]; tensor var_8357_to_fp16 = const()[name = tensor("op_8357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1331_cast_fp16, y = var_8357_to_fp16)[name = tensor("aw_chunk_1331_cast_fp16")]; tensor var_8359_to_fp16 = const()[name = tensor("op_8359_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1333_cast_fp16, y = var_8359_to_fp16)[name = tensor("aw_chunk_1333_cast_fp16")]; tensor var_8361_to_fp16 = const()[name = tensor("op_8361_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1335_cast_fp16, y = var_8361_to_fp16)[name = tensor("aw_chunk_1335_cast_fp16")]; tensor var_8363_to_fp16 = const()[name = tensor("op_8363_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1337_cast_fp16, y = var_8363_to_fp16)[name = tensor("aw_chunk_1337_cast_fp16")]; tensor var_8365_to_fp16 = const()[name = tensor("op_8365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1339_cast_fp16, y = var_8365_to_fp16)[name = tensor("aw_chunk_1339_cast_fp16")]; tensor var_8367_to_fp16 = const()[name = tensor("op_8367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1341_cast_fp16, y = var_8367_to_fp16)[name = tensor("aw_chunk_1341_cast_fp16")]; tensor var_8369_to_fp16 = const()[name = tensor("op_8369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1343_cast_fp16, y = var_8369_to_fp16)[name = tensor("aw_chunk_1343_cast_fp16")]; tensor var_8371_to_fp16 = const()[name = tensor("op_8371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1345_cast_fp16, y = var_8371_to_fp16)[name = tensor("aw_chunk_1345_cast_fp16")]; tensor var_8373_to_fp16 = const()[name = tensor("op_8373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1347_cast_fp16, y = var_8373_to_fp16)[name = tensor("aw_chunk_1347_cast_fp16")]; tensor var_8375_to_fp16 = const()[name = tensor("op_8375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1349_cast_fp16, y = var_8375_to_fp16)[name = tensor("aw_chunk_1349_cast_fp16")]; tensor var_8377_to_fp16 = const()[name = tensor("op_8377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1351_cast_fp16, y = var_8377_to_fp16)[name = tensor("aw_chunk_1351_cast_fp16")]; tensor var_8379_to_fp16 = const()[name = tensor("op_8379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1353_cast_fp16, y = var_8379_to_fp16)[name = tensor("aw_chunk_1353_cast_fp16")]; tensor var_8381_to_fp16 = const()[name = tensor("op_8381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1355_cast_fp16, y = var_8381_to_fp16)[name = tensor("aw_chunk_1355_cast_fp16")]; tensor var_8383_to_fp16 = const()[name = tensor("op_8383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1357_cast_fp16, y = var_8383_to_fp16)[name = tensor("aw_chunk_1357_cast_fp16")]; tensor var_8385_to_fp16 = const()[name = tensor("op_8385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1359_cast_fp16, y = var_8385_to_fp16)[name = tensor("aw_chunk_1359_cast_fp16")]; tensor var_8387_to_fp16 = const()[name = tensor("op_8387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1361_cast_fp16, y = var_8387_to_fp16)[name = tensor("aw_chunk_1361_cast_fp16")]; tensor var_8389_to_fp16 = const()[name = tensor("op_8389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1363_cast_fp16, y = var_8389_to_fp16)[name = tensor("aw_chunk_1363_cast_fp16")]; tensor var_8391_to_fp16 = const()[name = tensor("op_8391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1365_cast_fp16, y = var_8391_to_fp16)[name = tensor("aw_chunk_1365_cast_fp16")]; tensor var_8393_to_fp16 = const()[name = tensor("op_8393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1367_cast_fp16, y = var_8393_to_fp16)[name = tensor("aw_chunk_1367_cast_fp16")]; tensor var_8395_to_fp16 = const()[name = tensor("op_8395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1369_cast_fp16, y = var_8395_to_fp16)[name = tensor("aw_chunk_1369_cast_fp16")]; tensor var_8397_to_fp16 = const()[name = tensor("op_8397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1371_cast_fp16, y = var_8397_to_fp16)[name = tensor("aw_chunk_1371_cast_fp16")]; tensor var_8399_to_fp16 = const()[name = tensor("op_8399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1373_cast_fp16, y = var_8399_to_fp16)[name = tensor("aw_chunk_1373_cast_fp16")]; tensor var_8401_to_fp16 = const()[name = tensor("op_8401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1375_cast_fp16, y = var_8401_to_fp16)[name = tensor("aw_chunk_1375_cast_fp16")]; tensor var_8403_to_fp16 = const()[name = tensor("op_8403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1377_cast_fp16, y = var_8403_to_fp16)[name = tensor("aw_chunk_1377_cast_fp16")]; tensor var_8405_to_fp16 = const()[name = tensor("op_8405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1379_cast_fp16, y = var_8405_to_fp16)[name = tensor("aw_chunk_1379_cast_fp16")]; tensor var_8407_to_fp16 = const()[name = tensor("op_8407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1381_cast_fp16, y = var_8407_to_fp16)[name = tensor("aw_chunk_1381_cast_fp16")]; tensor var_8409_to_fp16 = const()[name = tensor("op_8409_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1383_cast_fp16, y = var_8409_to_fp16)[name = tensor("aw_chunk_1383_cast_fp16")]; tensor var_8411_to_fp16 = const()[name = tensor("op_8411_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1385_cast_fp16, y = var_8411_to_fp16)[name = tensor("aw_chunk_1385_cast_fp16")]; tensor var_8413_to_fp16 = const()[name = tensor("op_8413_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1387_cast_fp16, y = var_8413_to_fp16)[name = tensor("aw_chunk_1387_cast_fp16")]; tensor var_8415_to_fp16 = const()[name = tensor("op_8415_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1389_cast_fp16, y = var_8415_to_fp16)[name = tensor("aw_chunk_1389_cast_fp16")]; tensor var_8417_to_fp16 = const()[name = tensor("op_8417_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1391_cast_fp16, y = var_8417_to_fp16)[name = tensor("aw_chunk_1391_cast_fp16")]; tensor var_8419_to_fp16 = const()[name = tensor("op_8419_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1393_cast_fp16, y = var_8419_to_fp16)[name = tensor("aw_chunk_1393_cast_fp16")]; tensor var_8421_to_fp16 = const()[name = tensor("op_8421_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1395_cast_fp16, y = var_8421_to_fp16)[name = tensor("aw_chunk_1395_cast_fp16")]; tensor var_8423_to_fp16 = const()[name = tensor("op_8423_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1397_cast_fp16, y = var_8423_to_fp16)[name = tensor("aw_chunk_1397_cast_fp16")]; tensor var_8425_to_fp16 = const()[name = tensor("op_8425_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1399_cast_fp16, y = var_8425_to_fp16)[name = tensor("aw_chunk_1399_cast_fp16")]; tensor var_8427_to_fp16 = const()[name = tensor("op_8427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1401_cast_fp16, y = var_8427_to_fp16)[name = tensor("aw_chunk_1401_cast_fp16")]; tensor var_8429_to_fp16 = const()[name = tensor("op_8429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1403_cast_fp16, y = var_8429_to_fp16)[name = tensor("aw_chunk_1403_cast_fp16")]; tensor var_8431_to_fp16 = const()[name = tensor("op_8431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1405_cast_fp16, y = var_8431_to_fp16)[name = tensor("aw_chunk_1405_cast_fp16")]; tensor var_8433_to_fp16 = const()[name = tensor("op_8433_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1407_cast_fp16, y = var_8433_to_fp16)[name = tensor("aw_chunk_1407_cast_fp16")]; tensor var_8435_to_fp16 = const()[name = tensor("op_8435_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1409_cast_fp16, y = var_8435_to_fp16)[name = tensor("aw_chunk_1409_cast_fp16")]; tensor var_8437_to_fp16 = const()[name = tensor("op_8437_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1411_cast_fp16, y = var_8437_to_fp16)[name = tensor("aw_chunk_1411_cast_fp16")]; tensor var_8439_to_fp16 = const()[name = tensor("op_8439_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1413_cast_fp16, y = var_8439_to_fp16)[name = tensor("aw_chunk_1413_cast_fp16")]; tensor var_8441_to_fp16 = const()[name = tensor("op_8441_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1415_cast_fp16, y = var_8441_to_fp16)[name = tensor("aw_chunk_1415_cast_fp16")]; tensor var_8443_to_fp16 = const()[name = tensor("op_8443_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1417_cast_fp16, y = var_8443_to_fp16)[name = tensor("aw_chunk_1417_cast_fp16")]; tensor var_8445_to_fp16 = const()[name = tensor("op_8445_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1419_cast_fp16, y = var_8445_to_fp16)[name = tensor("aw_chunk_1419_cast_fp16")]; tensor var_8447_to_fp16 = const()[name = tensor("op_8447_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1421_cast_fp16, y = var_8447_to_fp16)[name = tensor("aw_chunk_1421_cast_fp16")]; tensor var_8449_to_fp16 = const()[name = tensor("op_8449_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1423_cast_fp16, y = var_8449_to_fp16)[name = tensor("aw_chunk_1423_cast_fp16")]; tensor var_8451_to_fp16 = const()[name = tensor("op_8451_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1425_cast_fp16, y = var_8451_to_fp16)[name = tensor("aw_chunk_1425_cast_fp16")]; tensor var_8453_to_fp16 = const()[name = tensor("op_8453_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1427_cast_fp16, y = var_8453_to_fp16)[name = tensor("aw_chunk_1427_cast_fp16")]; tensor var_8455_to_fp16 = const()[name = tensor("op_8455_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1429_cast_fp16, y = var_8455_to_fp16)[name = tensor("aw_chunk_1429_cast_fp16")]; tensor var_8457_to_fp16 = const()[name = tensor("op_8457_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1431_cast_fp16, y = var_8457_to_fp16)[name = tensor("aw_chunk_1431_cast_fp16")]; tensor var_8459_to_fp16 = const()[name = tensor("op_8459_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1433_cast_fp16, y = var_8459_to_fp16)[name = tensor("aw_chunk_1433_cast_fp16")]; tensor var_8461_to_fp16 = const()[name = tensor("op_8461_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1435_cast_fp16, y = var_8461_to_fp16)[name = tensor("aw_chunk_1435_cast_fp16")]; tensor var_8463_to_fp16 = const()[name = tensor("op_8463_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1437_cast_fp16, y = var_8463_to_fp16)[name = tensor("aw_chunk_1437_cast_fp16")]; tensor var_8465_to_fp16 = const()[name = tensor("op_8465_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1439_cast_fp16, y = var_8465_to_fp16)[name = tensor("aw_chunk_1439_cast_fp16")]; tensor var_8467_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1297_cast_fp16)[name = tensor("op_8467_cast_fp16")]; tensor var_8468_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1299_cast_fp16)[name = tensor("op_8468_cast_fp16")]; tensor var_8469_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1301_cast_fp16)[name = tensor("op_8469_cast_fp16")]; tensor var_8470_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1303_cast_fp16)[name = tensor("op_8470_cast_fp16")]; tensor var_8471_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1305_cast_fp16)[name = tensor("op_8471_cast_fp16")]; tensor var_8472_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1307_cast_fp16)[name = tensor("op_8472_cast_fp16")]; tensor var_8473_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1309_cast_fp16)[name = tensor("op_8473_cast_fp16")]; tensor var_8474_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1311_cast_fp16)[name = tensor("op_8474_cast_fp16")]; tensor var_8475_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1313_cast_fp16)[name = tensor("op_8475_cast_fp16")]; tensor var_8476_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1315_cast_fp16)[name = tensor("op_8476_cast_fp16")]; tensor var_8477_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1317_cast_fp16)[name = tensor("op_8477_cast_fp16")]; tensor var_8478_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1319_cast_fp16)[name = tensor("op_8478_cast_fp16")]; tensor var_8479_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1321_cast_fp16)[name = tensor("op_8479_cast_fp16")]; tensor var_8480_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1323_cast_fp16)[name = tensor("op_8480_cast_fp16")]; tensor var_8481_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1325_cast_fp16)[name = tensor("op_8481_cast_fp16")]; tensor var_8482_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1327_cast_fp16)[name = tensor("op_8482_cast_fp16")]; tensor var_8483_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1329_cast_fp16)[name = tensor("op_8483_cast_fp16")]; tensor var_8484_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1331_cast_fp16)[name = tensor("op_8484_cast_fp16")]; tensor var_8485_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1333_cast_fp16)[name = tensor("op_8485_cast_fp16")]; tensor var_8486_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1335_cast_fp16)[name = tensor("op_8486_cast_fp16")]; tensor var_8487_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1337_cast_fp16)[name = tensor("op_8487_cast_fp16")]; tensor var_8488_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1339_cast_fp16)[name = tensor("op_8488_cast_fp16")]; tensor var_8489_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1341_cast_fp16)[name = tensor("op_8489_cast_fp16")]; tensor var_8490_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1343_cast_fp16)[name = tensor("op_8490_cast_fp16")]; tensor var_8491_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1345_cast_fp16)[name = tensor("op_8491_cast_fp16")]; tensor var_8492_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1347_cast_fp16)[name = tensor("op_8492_cast_fp16")]; tensor var_8493_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1349_cast_fp16)[name = tensor("op_8493_cast_fp16")]; tensor var_8494_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1351_cast_fp16)[name = tensor("op_8494_cast_fp16")]; tensor var_8495_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1353_cast_fp16)[name = tensor("op_8495_cast_fp16")]; tensor var_8496_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1355_cast_fp16)[name = tensor("op_8496_cast_fp16")]; tensor var_8497_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1357_cast_fp16)[name = tensor("op_8497_cast_fp16")]; tensor var_8498_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1359_cast_fp16)[name = tensor("op_8498_cast_fp16")]; tensor var_8499_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1361_cast_fp16)[name = tensor("op_8499_cast_fp16")]; tensor var_8500_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1363_cast_fp16)[name = tensor("op_8500_cast_fp16")]; tensor var_8501_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1365_cast_fp16)[name = tensor("op_8501_cast_fp16")]; tensor var_8502_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1367_cast_fp16)[name = tensor("op_8502_cast_fp16")]; tensor var_8503_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1369_cast_fp16)[name = tensor("op_8503_cast_fp16")]; tensor var_8504_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1371_cast_fp16)[name = tensor("op_8504_cast_fp16")]; tensor var_8505_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1373_cast_fp16)[name = tensor("op_8505_cast_fp16")]; tensor var_8506_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1375_cast_fp16)[name = tensor("op_8506_cast_fp16")]; tensor var_8507_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1377_cast_fp16)[name = tensor("op_8507_cast_fp16")]; tensor var_8508_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1379_cast_fp16)[name = tensor("op_8508_cast_fp16")]; tensor var_8509_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1381_cast_fp16)[name = tensor("op_8509_cast_fp16")]; tensor var_8510_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1383_cast_fp16)[name = tensor("op_8510_cast_fp16")]; tensor var_8511_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1385_cast_fp16)[name = tensor("op_8511_cast_fp16")]; tensor var_8512_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1387_cast_fp16)[name = tensor("op_8512_cast_fp16")]; tensor var_8513_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1389_cast_fp16)[name = tensor("op_8513_cast_fp16")]; tensor var_8514_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1391_cast_fp16)[name = tensor("op_8514_cast_fp16")]; tensor var_8515_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1393_cast_fp16)[name = tensor("op_8515_cast_fp16")]; tensor var_8516_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1395_cast_fp16)[name = tensor("op_8516_cast_fp16")]; tensor var_8517_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1397_cast_fp16)[name = tensor("op_8517_cast_fp16")]; tensor var_8518_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1399_cast_fp16)[name = tensor("op_8518_cast_fp16")]; tensor var_8519_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1401_cast_fp16)[name = tensor("op_8519_cast_fp16")]; tensor var_8520_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1403_cast_fp16)[name = tensor("op_8520_cast_fp16")]; tensor var_8521_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1405_cast_fp16)[name = tensor("op_8521_cast_fp16")]; tensor var_8522_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1407_cast_fp16)[name = tensor("op_8522_cast_fp16")]; tensor var_8523_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1409_cast_fp16)[name = tensor("op_8523_cast_fp16")]; tensor var_8524_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1411_cast_fp16)[name = tensor("op_8524_cast_fp16")]; tensor var_8525_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1413_cast_fp16)[name = tensor("op_8525_cast_fp16")]; tensor var_8526_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1415_cast_fp16)[name = tensor("op_8526_cast_fp16")]; tensor var_8527_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1417_cast_fp16)[name = tensor("op_8527_cast_fp16")]; tensor var_8528_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1419_cast_fp16)[name = tensor("op_8528_cast_fp16")]; tensor var_8529_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1421_cast_fp16)[name = tensor("op_8529_cast_fp16")]; tensor var_8530_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1423_cast_fp16)[name = tensor("op_8530_cast_fp16")]; tensor var_8531_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1425_cast_fp16)[name = tensor("op_8531_cast_fp16")]; tensor var_8532_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1427_cast_fp16)[name = tensor("op_8532_cast_fp16")]; tensor var_8533_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1429_cast_fp16)[name = tensor("op_8533_cast_fp16")]; tensor var_8534_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1431_cast_fp16)[name = tensor("op_8534_cast_fp16")]; tensor var_8535_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1433_cast_fp16)[name = tensor("op_8535_cast_fp16")]; tensor var_8536_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1435_cast_fp16)[name = tensor("op_8536_cast_fp16")]; tensor var_8537_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1437_cast_fp16)[name = tensor("op_8537_cast_fp16")]; tensor var_8538_cast_fp16 = softmax(axis = var_7911, x = aw_chunk_1439_cast_fp16)[name = tensor("op_8538_cast_fp16")]; tensor var_8540_equation_0 = const()[name = tensor("op_8540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8540_cast_fp16 = einsum(equation = var_8540_equation_0, values = (var_8132_cast_fp16, var_8467_cast_fp16))[name = tensor("op_8540_cast_fp16")]; tensor var_8542_equation_0 = const()[name = tensor("op_8542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8542_cast_fp16 = einsum(equation = var_8542_equation_0, values = (var_8132_cast_fp16, var_8468_cast_fp16))[name = tensor("op_8542_cast_fp16")]; tensor var_8544_equation_0 = const()[name = tensor("op_8544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8544_cast_fp16 = einsum(equation = var_8544_equation_0, values = (var_8132_cast_fp16, var_8469_cast_fp16))[name = tensor("op_8544_cast_fp16")]; tensor var_8546_equation_0 = const()[name = tensor("op_8546_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8546_cast_fp16 = einsum(equation = var_8546_equation_0, values = (var_8132_cast_fp16, var_8470_cast_fp16))[name = tensor("op_8546_cast_fp16")]; tensor var_8548_equation_0 = const()[name = tensor("op_8548_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8548_cast_fp16 = einsum(equation = var_8548_equation_0, values = (var_8132_cast_fp16, var_8471_cast_fp16))[name = tensor("op_8548_cast_fp16")]; tensor var_8550_equation_0 = const()[name = tensor("op_8550_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8550_cast_fp16 = einsum(equation = var_8550_equation_0, values = (var_8132_cast_fp16, var_8472_cast_fp16))[name = tensor("op_8550_cast_fp16")]; tensor var_8552_equation_0 = const()[name = tensor("op_8552_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8552_cast_fp16 = einsum(equation = var_8552_equation_0, values = (var_8136_cast_fp16, var_8473_cast_fp16))[name = tensor("op_8552_cast_fp16")]; tensor var_8554_equation_0 = const()[name = tensor("op_8554_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8554_cast_fp16 = einsum(equation = var_8554_equation_0, values = (var_8136_cast_fp16, var_8474_cast_fp16))[name = tensor("op_8554_cast_fp16")]; tensor var_8556_equation_0 = const()[name = tensor("op_8556_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8556_cast_fp16 = einsum(equation = var_8556_equation_0, values = (var_8136_cast_fp16, var_8475_cast_fp16))[name = tensor("op_8556_cast_fp16")]; tensor var_8558_equation_0 = const()[name = tensor("op_8558_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8558_cast_fp16 = einsum(equation = var_8558_equation_0, values = (var_8136_cast_fp16, var_8476_cast_fp16))[name = tensor("op_8558_cast_fp16")]; tensor var_8560_equation_0 = const()[name = tensor("op_8560_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8560_cast_fp16 = einsum(equation = var_8560_equation_0, values = (var_8136_cast_fp16, var_8477_cast_fp16))[name = tensor("op_8560_cast_fp16")]; tensor var_8562_equation_0 = const()[name = tensor("op_8562_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8562_cast_fp16 = einsum(equation = var_8562_equation_0, values = (var_8136_cast_fp16, var_8478_cast_fp16))[name = tensor("op_8562_cast_fp16")]; tensor var_8564_equation_0 = const()[name = tensor("op_8564_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8564_cast_fp16 = einsum(equation = var_8564_equation_0, values = (var_8140_cast_fp16, var_8479_cast_fp16))[name = tensor("op_8564_cast_fp16")]; tensor var_8566_equation_0 = const()[name = tensor("op_8566_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8566_cast_fp16 = einsum(equation = var_8566_equation_0, values = (var_8140_cast_fp16, var_8480_cast_fp16))[name = tensor("op_8566_cast_fp16")]; tensor var_8568_equation_0 = const()[name = tensor("op_8568_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8568_cast_fp16 = einsum(equation = var_8568_equation_0, values = (var_8140_cast_fp16, var_8481_cast_fp16))[name = tensor("op_8568_cast_fp16")]; tensor var_8570_equation_0 = const()[name = tensor("op_8570_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8570_cast_fp16 = einsum(equation = var_8570_equation_0, values = (var_8140_cast_fp16, var_8482_cast_fp16))[name = tensor("op_8570_cast_fp16")]; tensor var_8572_equation_0 = const()[name = tensor("op_8572_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8572_cast_fp16 = einsum(equation = var_8572_equation_0, values = (var_8140_cast_fp16, var_8483_cast_fp16))[name = tensor("op_8572_cast_fp16")]; tensor var_8574_equation_0 = const()[name = tensor("op_8574_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8574_cast_fp16 = einsum(equation = var_8574_equation_0, values = (var_8140_cast_fp16, var_8484_cast_fp16))[name = tensor("op_8574_cast_fp16")]; tensor var_8576_equation_0 = const()[name = tensor("op_8576_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8576_cast_fp16 = einsum(equation = var_8576_equation_0, values = (var_8144_cast_fp16, var_8485_cast_fp16))[name = tensor("op_8576_cast_fp16")]; tensor var_8578_equation_0 = const()[name = tensor("op_8578_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8578_cast_fp16 = einsum(equation = var_8578_equation_0, values = (var_8144_cast_fp16, var_8486_cast_fp16))[name = tensor("op_8578_cast_fp16")]; tensor var_8580_equation_0 = const()[name = tensor("op_8580_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8580_cast_fp16 = einsum(equation = var_8580_equation_0, values = (var_8144_cast_fp16, var_8487_cast_fp16))[name = tensor("op_8580_cast_fp16")]; tensor var_8582_equation_0 = const()[name = tensor("op_8582_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8582_cast_fp16 = einsum(equation = var_8582_equation_0, values = (var_8144_cast_fp16, var_8488_cast_fp16))[name = tensor("op_8582_cast_fp16")]; tensor var_8584_equation_0 = const()[name = tensor("op_8584_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8584_cast_fp16 = einsum(equation = var_8584_equation_0, values = (var_8144_cast_fp16, var_8489_cast_fp16))[name = tensor("op_8584_cast_fp16")]; tensor var_8586_equation_0 = const()[name = tensor("op_8586_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8586_cast_fp16 = einsum(equation = var_8586_equation_0, values = (var_8144_cast_fp16, var_8490_cast_fp16))[name = tensor("op_8586_cast_fp16")]; tensor var_8588_equation_0 = const()[name = tensor("op_8588_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8588_cast_fp16 = einsum(equation = var_8588_equation_0, values = (var_8148_cast_fp16, var_8491_cast_fp16))[name = tensor("op_8588_cast_fp16")]; tensor var_8590_equation_0 = const()[name = tensor("op_8590_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8590_cast_fp16 = einsum(equation = var_8590_equation_0, values = (var_8148_cast_fp16, var_8492_cast_fp16))[name = tensor("op_8590_cast_fp16")]; tensor var_8592_equation_0 = const()[name = tensor("op_8592_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8592_cast_fp16 = einsum(equation = var_8592_equation_0, values = (var_8148_cast_fp16, var_8493_cast_fp16))[name = tensor("op_8592_cast_fp16")]; tensor var_8594_equation_0 = const()[name = tensor("op_8594_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8594_cast_fp16 = einsum(equation = var_8594_equation_0, values = (var_8148_cast_fp16, var_8494_cast_fp16))[name = tensor("op_8594_cast_fp16")]; tensor var_8596_equation_0 = const()[name = tensor("op_8596_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8596_cast_fp16 = einsum(equation = var_8596_equation_0, values = (var_8148_cast_fp16, var_8495_cast_fp16))[name = tensor("op_8596_cast_fp16")]; tensor var_8598_equation_0 = const()[name = tensor("op_8598_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8598_cast_fp16 = einsum(equation = var_8598_equation_0, values = (var_8148_cast_fp16, var_8496_cast_fp16))[name = tensor("op_8598_cast_fp16")]; tensor var_8600_equation_0 = const()[name = tensor("op_8600_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8600_cast_fp16 = einsum(equation = var_8600_equation_0, values = (var_8152_cast_fp16, var_8497_cast_fp16))[name = tensor("op_8600_cast_fp16")]; tensor var_8602_equation_0 = const()[name = tensor("op_8602_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8602_cast_fp16 = einsum(equation = var_8602_equation_0, values = (var_8152_cast_fp16, var_8498_cast_fp16))[name = tensor("op_8602_cast_fp16")]; tensor var_8604_equation_0 = const()[name = tensor("op_8604_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8604_cast_fp16 = einsum(equation = var_8604_equation_0, values = (var_8152_cast_fp16, var_8499_cast_fp16))[name = tensor("op_8604_cast_fp16")]; tensor var_8606_equation_0 = const()[name = tensor("op_8606_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8606_cast_fp16 = einsum(equation = var_8606_equation_0, values = (var_8152_cast_fp16, var_8500_cast_fp16))[name = tensor("op_8606_cast_fp16")]; tensor var_8608_equation_0 = const()[name = tensor("op_8608_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8608_cast_fp16 = einsum(equation = var_8608_equation_0, values = (var_8152_cast_fp16, var_8501_cast_fp16))[name = tensor("op_8608_cast_fp16")]; tensor var_8610_equation_0 = const()[name = tensor("op_8610_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8610_cast_fp16 = einsum(equation = var_8610_equation_0, values = (var_8152_cast_fp16, var_8502_cast_fp16))[name = tensor("op_8610_cast_fp16")]; tensor var_8612_equation_0 = const()[name = tensor("op_8612_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8612_cast_fp16 = einsum(equation = var_8612_equation_0, values = (var_8156_cast_fp16, var_8503_cast_fp16))[name = tensor("op_8612_cast_fp16")]; tensor var_8614_equation_0 = const()[name = tensor("op_8614_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8614_cast_fp16 = einsum(equation = var_8614_equation_0, values = (var_8156_cast_fp16, var_8504_cast_fp16))[name = tensor("op_8614_cast_fp16")]; tensor var_8616_equation_0 = const()[name = tensor("op_8616_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8616_cast_fp16 = einsum(equation = var_8616_equation_0, values = (var_8156_cast_fp16, var_8505_cast_fp16))[name = tensor("op_8616_cast_fp16")]; tensor var_8618_equation_0 = const()[name = tensor("op_8618_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8618_cast_fp16 = einsum(equation = var_8618_equation_0, values = (var_8156_cast_fp16, var_8506_cast_fp16))[name = tensor("op_8618_cast_fp16")]; tensor var_8620_equation_0 = const()[name = tensor("op_8620_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8620_cast_fp16 = einsum(equation = var_8620_equation_0, values = (var_8156_cast_fp16, var_8507_cast_fp16))[name = tensor("op_8620_cast_fp16")]; tensor var_8622_equation_0 = const()[name = tensor("op_8622_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8622_cast_fp16 = einsum(equation = var_8622_equation_0, values = (var_8156_cast_fp16, var_8508_cast_fp16))[name = tensor("op_8622_cast_fp16")]; tensor var_8624_equation_0 = const()[name = tensor("op_8624_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8624_cast_fp16 = einsum(equation = var_8624_equation_0, values = (var_8160_cast_fp16, var_8509_cast_fp16))[name = tensor("op_8624_cast_fp16")]; tensor var_8626_equation_0 = const()[name = tensor("op_8626_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8626_cast_fp16 = einsum(equation = var_8626_equation_0, values = (var_8160_cast_fp16, var_8510_cast_fp16))[name = tensor("op_8626_cast_fp16")]; tensor var_8628_equation_0 = const()[name = tensor("op_8628_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8628_cast_fp16 = einsum(equation = var_8628_equation_0, values = (var_8160_cast_fp16, var_8511_cast_fp16))[name = tensor("op_8628_cast_fp16")]; tensor var_8630_equation_0 = const()[name = tensor("op_8630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8630_cast_fp16 = einsum(equation = var_8630_equation_0, values = (var_8160_cast_fp16, var_8512_cast_fp16))[name = tensor("op_8630_cast_fp16")]; tensor var_8632_equation_0 = const()[name = tensor("op_8632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8632_cast_fp16 = einsum(equation = var_8632_equation_0, values = (var_8160_cast_fp16, var_8513_cast_fp16))[name = tensor("op_8632_cast_fp16")]; tensor var_8634_equation_0 = const()[name = tensor("op_8634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8634_cast_fp16 = einsum(equation = var_8634_equation_0, values = (var_8160_cast_fp16, var_8514_cast_fp16))[name = tensor("op_8634_cast_fp16")]; tensor var_8636_equation_0 = const()[name = tensor("op_8636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8636_cast_fp16 = einsum(equation = var_8636_equation_0, values = (var_8164_cast_fp16, var_8515_cast_fp16))[name = tensor("op_8636_cast_fp16")]; tensor var_8638_equation_0 = const()[name = tensor("op_8638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8638_cast_fp16 = einsum(equation = var_8638_equation_0, values = (var_8164_cast_fp16, var_8516_cast_fp16))[name = tensor("op_8638_cast_fp16")]; tensor var_8640_equation_0 = const()[name = tensor("op_8640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8640_cast_fp16 = einsum(equation = var_8640_equation_0, values = (var_8164_cast_fp16, var_8517_cast_fp16))[name = tensor("op_8640_cast_fp16")]; tensor var_8642_equation_0 = const()[name = tensor("op_8642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8642_cast_fp16 = einsum(equation = var_8642_equation_0, values = (var_8164_cast_fp16, var_8518_cast_fp16))[name = tensor("op_8642_cast_fp16")]; tensor var_8644_equation_0 = const()[name = tensor("op_8644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8644_cast_fp16 = einsum(equation = var_8644_equation_0, values = (var_8164_cast_fp16, var_8519_cast_fp16))[name = tensor("op_8644_cast_fp16")]; tensor var_8646_equation_0 = const()[name = tensor("op_8646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8646_cast_fp16 = einsum(equation = var_8646_equation_0, values = (var_8164_cast_fp16, var_8520_cast_fp16))[name = tensor("op_8646_cast_fp16")]; tensor var_8648_equation_0 = const()[name = tensor("op_8648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8648_cast_fp16 = einsum(equation = var_8648_equation_0, values = (var_8168_cast_fp16, var_8521_cast_fp16))[name = tensor("op_8648_cast_fp16")]; tensor var_8650_equation_0 = const()[name = tensor("op_8650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8650_cast_fp16 = einsum(equation = var_8650_equation_0, values = (var_8168_cast_fp16, var_8522_cast_fp16))[name = tensor("op_8650_cast_fp16")]; tensor var_8652_equation_0 = const()[name = tensor("op_8652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8652_cast_fp16 = einsum(equation = var_8652_equation_0, values = (var_8168_cast_fp16, var_8523_cast_fp16))[name = tensor("op_8652_cast_fp16")]; tensor var_8654_equation_0 = const()[name = tensor("op_8654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8654_cast_fp16 = einsum(equation = var_8654_equation_0, values = (var_8168_cast_fp16, var_8524_cast_fp16))[name = tensor("op_8654_cast_fp16")]; tensor var_8656_equation_0 = const()[name = tensor("op_8656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8656_cast_fp16 = einsum(equation = var_8656_equation_0, values = (var_8168_cast_fp16, var_8525_cast_fp16))[name = tensor("op_8656_cast_fp16")]; tensor var_8658_equation_0 = const()[name = tensor("op_8658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8658_cast_fp16 = einsum(equation = var_8658_equation_0, values = (var_8168_cast_fp16, var_8526_cast_fp16))[name = tensor("op_8658_cast_fp16")]; tensor var_8660_equation_0 = const()[name = tensor("op_8660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8660_cast_fp16 = einsum(equation = var_8660_equation_0, values = (var_8172_cast_fp16, var_8527_cast_fp16))[name = tensor("op_8660_cast_fp16")]; tensor var_8662_equation_0 = const()[name = tensor("op_8662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8662_cast_fp16 = einsum(equation = var_8662_equation_0, values = (var_8172_cast_fp16, var_8528_cast_fp16))[name = tensor("op_8662_cast_fp16")]; tensor var_8664_equation_0 = const()[name = tensor("op_8664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8664_cast_fp16 = einsum(equation = var_8664_equation_0, values = (var_8172_cast_fp16, var_8529_cast_fp16))[name = tensor("op_8664_cast_fp16")]; tensor var_8666_equation_0 = const()[name = tensor("op_8666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8666_cast_fp16 = einsum(equation = var_8666_equation_0, values = (var_8172_cast_fp16, var_8530_cast_fp16))[name = tensor("op_8666_cast_fp16")]; tensor var_8668_equation_0 = const()[name = tensor("op_8668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8668_cast_fp16 = einsum(equation = var_8668_equation_0, values = (var_8172_cast_fp16, var_8531_cast_fp16))[name = tensor("op_8668_cast_fp16")]; tensor var_8670_equation_0 = const()[name = tensor("op_8670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8670_cast_fp16 = einsum(equation = var_8670_equation_0, values = (var_8172_cast_fp16, var_8532_cast_fp16))[name = tensor("op_8670_cast_fp16")]; tensor var_8672_equation_0 = const()[name = tensor("op_8672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8672_cast_fp16 = einsum(equation = var_8672_equation_0, values = (var_8176_cast_fp16, var_8533_cast_fp16))[name = tensor("op_8672_cast_fp16")]; tensor var_8674_equation_0 = const()[name = tensor("op_8674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8674_cast_fp16 = einsum(equation = var_8674_equation_0, values = (var_8176_cast_fp16, var_8534_cast_fp16))[name = tensor("op_8674_cast_fp16")]; tensor var_8676_equation_0 = const()[name = tensor("op_8676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8676_cast_fp16 = einsum(equation = var_8676_equation_0, values = (var_8176_cast_fp16, var_8535_cast_fp16))[name = tensor("op_8676_cast_fp16")]; tensor var_8678_equation_0 = const()[name = tensor("op_8678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8678_cast_fp16 = einsum(equation = var_8678_equation_0, values = (var_8176_cast_fp16, var_8536_cast_fp16))[name = tensor("op_8678_cast_fp16")]; tensor var_8680_equation_0 = const()[name = tensor("op_8680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8680_cast_fp16 = einsum(equation = var_8680_equation_0, values = (var_8176_cast_fp16, var_8537_cast_fp16))[name = tensor("op_8680_cast_fp16")]; tensor var_8682_equation_0 = const()[name = tensor("op_8682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8682_cast_fp16 = einsum(equation = var_8682_equation_0, values = (var_8176_cast_fp16, var_8538_cast_fp16))[name = tensor("op_8682_cast_fp16")]; tensor var_8684_interleave_0 = const()[name = tensor("op_8684_interleave_0"), val = tensor(false)]; tensor var_8684_cast_fp16 = concat(axis = var_7895, interleave = var_8684_interleave_0, values = (var_8540_cast_fp16, var_8542_cast_fp16, var_8544_cast_fp16, var_8546_cast_fp16, var_8548_cast_fp16, var_8550_cast_fp16))[name = tensor("op_8684_cast_fp16")]; tensor var_8686_interleave_0 = const()[name = tensor("op_8686_interleave_0"), val = tensor(false)]; tensor var_8686_cast_fp16 = concat(axis = var_7895, interleave = var_8686_interleave_0, values = (var_8552_cast_fp16, var_8554_cast_fp16, var_8556_cast_fp16, var_8558_cast_fp16, var_8560_cast_fp16, var_8562_cast_fp16))[name = tensor("op_8686_cast_fp16")]; tensor var_8688_interleave_0 = const()[name = tensor("op_8688_interleave_0"), val = tensor(false)]; tensor var_8688_cast_fp16 = concat(axis = var_7895, interleave = var_8688_interleave_0, values = (var_8564_cast_fp16, var_8566_cast_fp16, var_8568_cast_fp16, var_8570_cast_fp16, var_8572_cast_fp16, var_8574_cast_fp16))[name = tensor("op_8688_cast_fp16")]; tensor var_8690_interleave_0 = const()[name = tensor("op_8690_interleave_0"), val = tensor(false)]; tensor var_8690_cast_fp16 = concat(axis = var_7895, interleave = var_8690_interleave_0, values = (var_8576_cast_fp16, var_8578_cast_fp16, var_8580_cast_fp16, var_8582_cast_fp16, var_8584_cast_fp16, var_8586_cast_fp16))[name = tensor("op_8690_cast_fp16")]; tensor var_8692_interleave_0 = const()[name = tensor("op_8692_interleave_0"), val = tensor(false)]; tensor var_8692_cast_fp16 = concat(axis = var_7895, interleave = var_8692_interleave_0, values = (var_8588_cast_fp16, var_8590_cast_fp16, var_8592_cast_fp16, var_8594_cast_fp16, var_8596_cast_fp16, var_8598_cast_fp16))[name = tensor("op_8692_cast_fp16")]; tensor var_8694_interleave_0 = const()[name = tensor("op_8694_interleave_0"), val = tensor(false)]; tensor var_8694_cast_fp16 = concat(axis = var_7895, interleave = var_8694_interleave_0, values = (var_8600_cast_fp16, var_8602_cast_fp16, var_8604_cast_fp16, var_8606_cast_fp16, var_8608_cast_fp16, var_8610_cast_fp16))[name = tensor("op_8694_cast_fp16")]; tensor var_8696_interleave_0 = const()[name = tensor("op_8696_interleave_0"), val = tensor(false)]; tensor var_8696_cast_fp16 = concat(axis = var_7895, interleave = var_8696_interleave_0, values = (var_8612_cast_fp16, var_8614_cast_fp16, var_8616_cast_fp16, var_8618_cast_fp16, var_8620_cast_fp16, var_8622_cast_fp16))[name = tensor("op_8696_cast_fp16")]; tensor var_8698_interleave_0 = const()[name = tensor("op_8698_interleave_0"), val = tensor(false)]; tensor var_8698_cast_fp16 = concat(axis = var_7895, interleave = var_8698_interleave_0, values = (var_8624_cast_fp16, var_8626_cast_fp16, var_8628_cast_fp16, var_8630_cast_fp16, var_8632_cast_fp16, var_8634_cast_fp16))[name = tensor("op_8698_cast_fp16")]; tensor var_8700_interleave_0 = const()[name = tensor("op_8700_interleave_0"), val = tensor(false)]; tensor var_8700_cast_fp16 = concat(axis = var_7895, interleave = var_8700_interleave_0, values = (var_8636_cast_fp16, var_8638_cast_fp16, var_8640_cast_fp16, var_8642_cast_fp16, var_8644_cast_fp16, var_8646_cast_fp16))[name = tensor("op_8700_cast_fp16")]; tensor var_8702_interleave_0 = const()[name = tensor("op_8702_interleave_0"), val = tensor(false)]; tensor var_8702_cast_fp16 = concat(axis = var_7895, interleave = var_8702_interleave_0, values = (var_8648_cast_fp16, var_8650_cast_fp16, var_8652_cast_fp16, var_8654_cast_fp16, var_8656_cast_fp16, var_8658_cast_fp16))[name = tensor("op_8702_cast_fp16")]; tensor var_8704_interleave_0 = const()[name = tensor("op_8704_interleave_0"), val = tensor(false)]; tensor var_8704_cast_fp16 = concat(axis = var_7895, interleave = var_8704_interleave_0, values = (var_8660_cast_fp16, var_8662_cast_fp16, var_8664_cast_fp16, var_8666_cast_fp16, var_8668_cast_fp16, var_8670_cast_fp16))[name = tensor("op_8704_cast_fp16")]; tensor var_8706_interleave_0 = const()[name = tensor("op_8706_interleave_0"), val = tensor(false)]; tensor var_8706_cast_fp16 = concat(axis = var_7895, interleave = var_8706_interleave_0, values = (var_8672_cast_fp16, var_8674_cast_fp16, var_8676_cast_fp16, var_8678_cast_fp16, var_8680_cast_fp16, var_8682_cast_fp16))[name = tensor("op_8706_cast_fp16")]; tensor input_73_interleave_0 = const()[name = tensor("input_73_interleave_0"), val = tensor(false)]; tensor input_73_cast_fp16 = concat(axis = var_7911, interleave = input_73_interleave_0, values = (var_8684_cast_fp16, var_8686_cast_fp16, var_8688_cast_fp16, var_8690_cast_fp16, var_8692_cast_fp16, var_8694_cast_fp16, var_8696_cast_fp16, var_8698_cast_fp16, var_8700_cast_fp16, var_8702_cast_fp16, var_8704_cast_fp16, var_8706_cast_fp16))[name = tensor("input_73_cast_fp16")]; tensor obj_39_pad_type_0 = const()[name = tensor("obj_39_pad_type_0"), val = tensor("valid")]; tensor obj_39_strides_0 = const()[name = tensor("obj_39_strides_0"), val = tensor([1, 1])]; tensor obj_39_pad_0 = const()[name = tensor("obj_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_39_dilations_0 = const()[name = tensor("obj_39_dilations_0"), val = tensor([1, 1])]; tensor obj_39_groups_0 = const()[name = tensor("obj_39_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137340288)))]; tensor layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138520000)))]; tensor obj_39_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = obj_39_dilations_0, groups = obj_39_groups_0, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = obj_39_strides_0, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("obj_39_cast_fp16")]; tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; tensor var_8725_to_fp16 = const()[name = tensor("op_8725_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_8725_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138521600)))]; tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138523200)))]; tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("input_75_cast_fp16")]; tensor input_77_pad_type_0 = const()[name = tensor("input_77_pad_type_0"), val = tensor("valid")]; tensor input_77_strides_0 = const()[name = tensor("input_77_strides_0"), val = tensor([1, 1])]; tensor input_77_pad_0 = const()[name = tensor("input_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_77_dilations_0 = const()[name = tensor("input_77_dilations_0"), val = tensor([1, 1])]; tensor input_77_groups_0 = const()[name = tensor("input_77_groups_0"), val = tensor(1)]; tensor layers_9_fc1_weight_to_fp16 = const()[name = tensor("layers_9_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138524800)))]; tensor layers_9_fc1_bias_to_fp16 = const()[name = tensor("layers_9_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143243456)))]; tensor input_77_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = input_77_dilations_0, groups = input_77_groups_0, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = input_77_strides_0, weight = layers_9_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor("input_77_cast_fp16")]; tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; tensor hidden_states_23_pad_type_0 = const()[name = tensor("hidden_states_23_pad_type_0"), val = tensor("valid")]; tensor hidden_states_23_strides_0 = const()[name = tensor("hidden_states_23_strides_0"), val = tensor([1, 1])]; tensor hidden_states_23_pad_0 = const()[name = tensor("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_23_dilations_0 = const()[name = tensor("hidden_states_23_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_23_groups_0 = const()[name = tensor("hidden_states_23_groups_0"), val = tensor(1)]; tensor layers_9_fc2_weight_to_fp16 = const()[name = tensor("layers_9_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143249664)))]; tensor layers_9_fc2_bias_to_fp16 = const()[name = tensor("layers_9_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147968320)))]; tensor hidden_states_23_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = layers_9_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; tensor var_8757 = const()[name = tensor("op_8757"), val = tensor(3)]; tensor var_8773 = const()[name = tensor("op_8773"), val = tensor(1)]; tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; tensor var_8790_to_fp16 = const()[name = tensor("op_8790_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_8790_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; tensor obj_41_gamma_0_to_fp16 = const()[name = tensor("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147969920)))]; tensor obj_41_beta_0_to_fp16 = const()[name = tensor("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147971520)))]; tensor obj_41_epsilon_0_to_fp16 = const()[name = tensor("obj_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("obj_41_cast_fp16")]; tensor query_21_pad_type_0 = const()[name = tensor("query_21_pad_type_0"), val = tensor("valid")]; tensor query_21_strides_0 = const()[name = tensor("query_21_strides_0"), val = tensor([1, 1])]; tensor query_21_pad_0 = const()[name = tensor("query_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_21_dilations_0 = const()[name = tensor("query_21_dilations_0"), val = tensor([1, 1])]; tensor query_21_groups_0 = const()[name = tensor("query_21_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147973120)))]; tensor layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149152832)))]; tensor query_21_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = query_21_dilations_0, groups = query_21_groups_0, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = query_21_strides_0, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor("query_21_cast_fp16")]; tensor key_21_pad_type_0 = const()[name = tensor("key_21_pad_type_0"), val = tensor("valid")]; tensor key_21_strides_0 = const()[name = tensor("key_21_strides_0"), val = tensor([1, 1])]; tensor key_21_pad_0 = const()[name = tensor("key_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_21_dilations_0 = const()[name = tensor("key_21_dilations_0"), val = tensor([1, 1])]; tensor key_21_groups_0 = const()[name = tensor("key_21_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149154432)))]; tensor key_21_cast_fp16 = conv(dilations = key_21_dilations_0, groups = key_21_groups_0, pad = key_21_pad_0, pad_type = key_21_pad_type_0, strides = key_21_strides_0, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor("key_21_cast_fp16")]; tensor value_21_pad_type_0 = const()[name = tensor("value_21_pad_type_0"), val = tensor("valid")]; tensor value_21_strides_0 = const()[name = tensor("value_21_strides_0"), val = tensor([1, 1])]; tensor value_21_pad_0 = const()[name = tensor("value_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_21_dilations_0 = const()[name = tensor("value_21_dilations_0"), val = tensor([1, 1])]; tensor value_21_groups_0 = const()[name = tensor("value_21_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150334144)))]; tensor layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151513856)))]; tensor value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = value_21_dilations_0, groups = value_21_groups_0, pad = value_21_pad_0, pad_type = value_21_pad_type_0, strides = value_21_strides_0, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor("value_21_cast_fp16")]; tensor var_8825_begin_0 = const()[name = tensor("op_8825_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8825_end_0 = const()[name = tensor("op_8825_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8825_end_mask_0 = const()[name = tensor("op_8825_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8825_cast_fp16 = slice_by_index(begin = var_8825_begin_0, end = var_8825_end_0, end_mask = var_8825_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8825_cast_fp16")]; tensor var_8829_begin_0 = const()[name = tensor("op_8829_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_8829_end_0 = const()[name = tensor("op_8829_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_8829_end_mask_0 = const()[name = tensor("op_8829_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8829_cast_fp16 = slice_by_index(begin = var_8829_begin_0, end = var_8829_end_0, end_mask = var_8829_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8829_cast_fp16")]; tensor var_8833_begin_0 = const()[name = tensor("op_8833_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_8833_end_0 = const()[name = tensor("op_8833_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_8833_end_mask_0 = const()[name = tensor("op_8833_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8833_cast_fp16 = slice_by_index(begin = var_8833_begin_0, end = var_8833_end_0, end_mask = var_8833_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8833_cast_fp16")]; tensor var_8837_begin_0 = const()[name = tensor("op_8837_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_8837_end_0 = const()[name = tensor("op_8837_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_8837_end_mask_0 = const()[name = tensor("op_8837_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8837_cast_fp16 = slice_by_index(begin = var_8837_begin_0, end = var_8837_end_0, end_mask = var_8837_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8837_cast_fp16")]; tensor var_8841_begin_0 = const()[name = tensor("op_8841_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_8841_end_0 = const()[name = tensor("op_8841_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_8841_end_mask_0 = const()[name = tensor("op_8841_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8841_cast_fp16 = slice_by_index(begin = var_8841_begin_0, end = var_8841_end_0, end_mask = var_8841_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8841_cast_fp16")]; tensor var_8845_begin_0 = const()[name = tensor("op_8845_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8845_end_0 = const()[name = tensor("op_8845_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_8845_end_mask_0 = const()[name = tensor("op_8845_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8845_cast_fp16 = slice_by_index(begin = var_8845_begin_0, end = var_8845_end_0, end_mask = var_8845_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8845_cast_fp16")]; tensor var_8849_begin_0 = const()[name = tensor("op_8849_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_8849_end_0 = const()[name = tensor("op_8849_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_8849_end_mask_0 = const()[name = tensor("op_8849_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8849_cast_fp16 = slice_by_index(begin = var_8849_begin_0, end = var_8849_end_0, end_mask = var_8849_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8849_cast_fp16")]; tensor var_8853_begin_0 = const()[name = tensor("op_8853_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_8853_end_0 = const()[name = tensor("op_8853_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_8853_end_mask_0 = const()[name = tensor("op_8853_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8853_cast_fp16 = slice_by_index(begin = var_8853_begin_0, end = var_8853_end_0, end_mask = var_8853_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8853_cast_fp16")]; tensor var_8857_begin_0 = const()[name = tensor("op_8857_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_8857_end_0 = const()[name = tensor("op_8857_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_8857_end_mask_0 = const()[name = tensor("op_8857_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8857_cast_fp16 = slice_by_index(begin = var_8857_begin_0, end = var_8857_end_0, end_mask = var_8857_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8857_cast_fp16")]; tensor var_8861_begin_0 = const()[name = tensor("op_8861_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_8861_end_0 = const()[name = tensor("op_8861_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_8861_end_mask_0 = const()[name = tensor("op_8861_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8861_cast_fp16 = slice_by_index(begin = var_8861_begin_0, end = var_8861_end_0, end_mask = var_8861_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8861_cast_fp16")]; tensor var_8865_begin_0 = const()[name = tensor("op_8865_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_8865_end_0 = const()[name = tensor("op_8865_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_8865_end_mask_0 = const()[name = tensor("op_8865_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8865_cast_fp16 = slice_by_index(begin = var_8865_begin_0, end = var_8865_end_0, end_mask = var_8865_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8865_cast_fp16")]; tensor var_8869_begin_0 = const()[name = tensor("op_8869_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_8869_end_0 = const()[name = tensor("op_8869_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_8869_end_mask_0 = const()[name = tensor("op_8869_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8869_cast_fp16 = slice_by_index(begin = var_8869_begin_0, end = var_8869_end_0, end_mask = var_8869_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_8869_cast_fp16")]; tensor var_8872_begin_0 = const()[name = tensor("op_8872_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8872_end_0 = const()[name = tensor("op_8872_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8872_end_mask_0 = const()[name = tensor("op_8872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8872_cast_fp16 = slice_by_index(begin = var_8872_begin_0, end = var_8872_end_0, end_mask = var_8872_end_mask_0, x = var_8825_cast_fp16)[name = tensor("op_8872_cast_fp16")]; tensor var_8873_begin_0 = const()[name = tensor("op_8873_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8873_end_0 = const()[name = tensor("op_8873_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8873_end_mask_0 = const()[name = tensor("op_8873_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8873_cast_fp16 = slice_by_index(begin = var_8873_begin_0, end = var_8873_end_0, end_mask = var_8873_end_mask_0, x = var_8825_cast_fp16)[name = tensor("op_8873_cast_fp16")]; tensor var_8874_begin_0 = const()[name = tensor("op_8874_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8874_end_0 = const()[name = tensor("op_8874_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8874_end_mask_0 = const()[name = tensor("op_8874_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8874_cast_fp16 = slice_by_index(begin = var_8874_begin_0, end = var_8874_end_0, end_mask = var_8874_end_mask_0, x = var_8825_cast_fp16)[name = tensor("op_8874_cast_fp16")]; tensor var_8875_begin_0 = const()[name = tensor("op_8875_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8875_end_0 = const()[name = tensor("op_8875_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8875_end_mask_0 = const()[name = tensor("op_8875_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8875_cast_fp16 = slice_by_index(begin = var_8875_begin_0, end = var_8875_end_0, end_mask = var_8875_end_mask_0, x = var_8825_cast_fp16)[name = tensor("op_8875_cast_fp16")]; tensor var_8876_begin_0 = const()[name = tensor("op_8876_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8876_end_0 = const()[name = tensor("op_8876_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8876_end_mask_0 = const()[name = tensor("op_8876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8876_cast_fp16 = slice_by_index(begin = var_8876_begin_0, end = var_8876_end_0, end_mask = var_8876_end_mask_0, x = var_8825_cast_fp16)[name = tensor("op_8876_cast_fp16")]; tensor var_8877_begin_0 = const()[name = tensor("op_8877_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8877_end_0 = const()[name = tensor("op_8877_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8877_end_mask_0 = const()[name = tensor("op_8877_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8877_cast_fp16 = slice_by_index(begin = var_8877_begin_0, end = var_8877_end_0, end_mask = var_8877_end_mask_0, x = var_8825_cast_fp16)[name = tensor("op_8877_cast_fp16")]; tensor var_8878_begin_0 = const()[name = tensor("op_8878_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8878_end_0 = const()[name = tensor("op_8878_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8878_end_mask_0 = const()[name = tensor("op_8878_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8878_cast_fp16 = slice_by_index(begin = var_8878_begin_0, end = var_8878_end_0, end_mask = var_8878_end_mask_0, x = var_8829_cast_fp16)[name = tensor("op_8878_cast_fp16")]; tensor var_8879_begin_0 = const()[name = tensor("op_8879_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8879_end_0 = const()[name = tensor("op_8879_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8879_end_mask_0 = const()[name = tensor("op_8879_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8879_cast_fp16 = slice_by_index(begin = var_8879_begin_0, end = var_8879_end_0, end_mask = var_8879_end_mask_0, x = var_8829_cast_fp16)[name = tensor("op_8879_cast_fp16")]; tensor var_8880_begin_0 = const()[name = tensor("op_8880_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8880_end_0 = const()[name = tensor("op_8880_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8880_end_mask_0 = const()[name = tensor("op_8880_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8880_cast_fp16 = slice_by_index(begin = var_8880_begin_0, end = var_8880_end_0, end_mask = var_8880_end_mask_0, x = var_8829_cast_fp16)[name = tensor("op_8880_cast_fp16")]; tensor var_8881_begin_0 = const()[name = tensor("op_8881_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8881_end_0 = const()[name = tensor("op_8881_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8881_end_mask_0 = const()[name = tensor("op_8881_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8881_cast_fp16 = slice_by_index(begin = var_8881_begin_0, end = var_8881_end_0, end_mask = var_8881_end_mask_0, x = var_8829_cast_fp16)[name = tensor("op_8881_cast_fp16")]; tensor var_8882_begin_0 = const()[name = tensor("op_8882_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8882_end_0 = const()[name = tensor("op_8882_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8882_end_mask_0 = const()[name = tensor("op_8882_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8882_cast_fp16 = slice_by_index(begin = var_8882_begin_0, end = var_8882_end_0, end_mask = var_8882_end_mask_0, x = var_8829_cast_fp16)[name = tensor("op_8882_cast_fp16")]; tensor var_8883_begin_0 = const()[name = tensor("op_8883_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8883_end_0 = const()[name = tensor("op_8883_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8883_end_mask_0 = const()[name = tensor("op_8883_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8883_cast_fp16 = slice_by_index(begin = var_8883_begin_0, end = var_8883_end_0, end_mask = var_8883_end_mask_0, x = var_8829_cast_fp16)[name = tensor("op_8883_cast_fp16")]; tensor var_8884_begin_0 = const()[name = tensor("op_8884_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8884_end_0 = const()[name = tensor("op_8884_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8884_end_mask_0 = const()[name = tensor("op_8884_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8884_cast_fp16 = slice_by_index(begin = var_8884_begin_0, end = var_8884_end_0, end_mask = var_8884_end_mask_0, x = var_8833_cast_fp16)[name = tensor("op_8884_cast_fp16")]; tensor var_8885_begin_0 = const()[name = tensor("op_8885_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8885_end_0 = const()[name = tensor("op_8885_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8885_end_mask_0 = const()[name = tensor("op_8885_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8885_cast_fp16 = slice_by_index(begin = var_8885_begin_0, end = var_8885_end_0, end_mask = var_8885_end_mask_0, x = var_8833_cast_fp16)[name = tensor("op_8885_cast_fp16")]; tensor var_8886_begin_0 = const()[name = tensor("op_8886_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8886_end_0 = const()[name = tensor("op_8886_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8886_end_mask_0 = const()[name = tensor("op_8886_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8886_cast_fp16 = slice_by_index(begin = var_8886_begin_0, end = var_8886_end_0, end_mask = var_8886_end_mask_0, x = var_8833_cast_fp16)[name = tensor("op_8886_cast_fp16")]; tensor var_8887_begin_0 = const()[name = tensor("op_8887_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8887_end_0 = const()[name = tensor("op_8887_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8887_end_mask_0 = const()[name = tensor("op_8887_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8887_cast_fp16 = slice_by_index(begin = var_8887_begin_0, end = var_8887_end_0, end_mask = var_8887_end_mask_0, x = var_8833_cast_fp16)[name = tensor("op_8887_cast_fp16")]; tensor var_8888_begin_0 = const()[name = tensor("op_8888_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8888_end_0 = const()[name = tensor("op_8888_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8888_end_mask_0 = const()[name = tensor("op_8888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8888_cast_fp16 = slice_by_index(begin = var_8888_begin_0, end = var_8888_end_0, end_mask = var_8888_end_mask_0, x = var_8833_cast_fp16)[name = tensor("op_8888_cast_fp16")]; tensor var_8889_begin_0 = const()[name = tensor("op_8889_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8889_end_0 = const()[name = tensor("op_8889_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8889_end_mask_0 = const()[name = tensor("op_8889_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8889_cast_fp16 = slice_by_index(begin = var_8889_begin_0, end = var_8889_end_0, end_mask = var_8889_end_mask_0, x = var_8833_cast_fp16)[name = tensor("op_8889_cast_fp16")]; tensor var_8890_begin_0 = const()[name = tensor("op_8890_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8890_end_0 = const()[name = tensor("op_8890_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8890_end_mask_0 = const()[name = tensor("op_8890_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8890_cast_fp16 = slice_by_index(begin = var_8890_begin_0, end = var_8890_end_0, end_mask = var_8890_end_mask_0, x = var_8837_cast_fp16)[name = tensor("op_8890_cast_fp16")]; tensor var_8891_begin_0 = const()[name = tensor("op_8891_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8891_end_0 = const()[name = tensor("op_8891_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8891_end_mask_0 = const()[name = tensor("op_8891_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8891_cast_fp16 = slice_by_index(begin = var_8891_begin_0, end = var_8891_end_0, end_mask = var_8891_end_mask_0, x = var_8837_cast_fp16)[name = tensor("op_8891_cast_fp16")]; tensor var_8892_begin_0 = const()[name = tensor("op_8892_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8892_end_0 = const()[name = tensor("op_8892_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8892_end_mask_0 = const()[name = tensor("op_8892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8892_cast_fp16 = slice_by_index(begin = var_8892_begin_0, end = var_8892_end_0, end_mask = var_8892_end_mask_0, x = var_8837_cast_fp16)[name = tensor("op_8892_cast_fp16")]; tensor var_8893_begin_0 = const()[name = tensor("op_8893_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8893_end_0 = const()[name = tensor("op_8893_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8893_end_mask_0 = const()[name = tensor("op_8893_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8893_cast_fp16 = slice_by_index(begin = var_8893_begin_0, end = var_8893_end_0, end_mask = var_8893_end_mask_0, x = var_8837_cast_fp16)[name = tensor("op_8893_cast_fp16")]; tensor var_8894_begin_0 = const()[name = tensor("op_8894_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8894_end_0 = const()[name = tensor("op_8894_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8894_end_mask_0 = const()[name = tensor("op_8894_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8894_cast_fp16 = slice_by_index(begin = var_8894_begin_0, end = var_8894_end_0, end_mask = var_8894_end_mask_0, x = var_8837_cast_fp16)[name = tensor("op_8894_cast_fp16")]; tensor var_8895_begin_0 = const()[name = tensor("op_8895_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8895_end_0 = const()[name = tensor("op_8895_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8895_end_mask_0 = const()[name = tensor("op_8895_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8895_cast_fp16 = slice_by_index(begin = var_8895_begin_0, end = var_8895_end_0, end_mask = var_8895_end_mask_0, x = var_8837_cast_fp16)[name = tensor("op_8895_cast_fp16")]; tensor var_8896_begin_0 = const()[name = tensor("op_8896_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8896_end_0 = const()[name = tensor("op_8896_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8896_end_mask_0 = const()[name = tensor("op_8896_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8896_cast_fp16 = slice_by_index(begin = var_8896_begin_0, end = var_8896_end_0, end_mask = var_8896_end_mask_0, x = var_8841_cast_fp16)[name = tensor("op_8896_cast_fp16")]; tensor var_8897_begin_0 = const()[name = tensor("op_8897_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8897_end_0 = const()[name = tensor("op_8897_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8897_end_mask_0 = const()[name = tensor("op_8897_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8897_cast_fp16 = slice_by_index(begin = var_8897_begin_0, end = var_8897_end_0, end_mask = var_8897_end_mask_0, x = var_8841_cast_fp16)[name = tensor("op_8897_cast_fp16")]; tensor var_8898_begin_0 = const()[name = tensor("op_8898_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8898_end_0 = const()[name = tensor("op_8898_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8898_end_mask_0 = const()[name = tensor("op_8898_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8898_cast_fp16 = slice_by_index(begin = var_8898_begin_0, end = var_8898_end_0, end_mask = var_8898_end_mask_0, x = var_8841_cast_fp16)[name = tensor("op_8898_cast_fp16")]; tensor var_8899_begin_0 = const()[name = tensor("op_8899_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8899_end_0 = const()[name = tensor("op_8899_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8899_end_mask_0 = const()[name = tensor("op_8899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8899_cast_fp16 = slice_by_index(begin = var_8899_begin_0, end = var_8899_end_0, end_mask = var_8899_end_mask_0, x = var_8841_cast_fp16)[name = tensor("op_8899_cast_fp16")]; tensor var_8900_begin_0 = const()[name = tensor("op_8900_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8900_end_0 = const()[name = tensor("op_8900_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8900_end_mask_0 = const()[name = tensor("op_8900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8900_cast_fp16 = slice_by_index(begin = var_8900_begin_0, end = var_8900_end_0, end_mask = var_8900_end_mask_0, x = var_8841_cast_fp16)[name = tensor("op_8900_cast_fp16")]; tensor var_8901_begin_0 = const()[name = tensor("op_8901_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8901_end_0 = const()[name = tensor("op_8901_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8901_end_mask_0 = const()[name = tensor("op_8901_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8901_cast_fp16 = slice_by_index(begin = var_8901_begin_0, end = var_8901_end_0, end_mask = var_8901_end_mask_0, x = var_8841_cast_fp16)[name = tensor("op_8901_cast_fp16")]; tensor var_8902_begin_0 = const()[name = tensor("op_8902_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8902_end_0 = const()[name = tensor("op_8902_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8902_end_mask_0 = const()[name = tensor("op_8902_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8902_cast_fp16 = slice_by_index(begin = var_8902_begin_0, end = var_8902_end_0, end_mask = var_8902_end_mask_0, x = var_8845_cast_fp16)[name = tensor("op_8902_cast_fp16")]; tensor var_8903_begin_0 = const()[name = tensor("op_8903_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8903_end_0 = const()[name = tensor("op_8903_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8903_end_mask_0 = const()[name = tensor("op_8903_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8903_cast_fp16 = slice_by_index(begin = var_8903_begin_0, end = var_8903_end_0, end_mask = var_8903_end_mask_0, x = var_8845_cast_fp16)[name = tensor("op_8903_cast_fp16")]; tensor var_8904_begin_0 = const()[name = tensor("op_8904_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8904_end_0 = const()[name = tensor("op_8904_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8904_end_mask_0 = const()[name = tensor("op_8904_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8904_cast_fp16 = slice_by_index(begin = var_8904_begin_0, end = var_8904_end_0, end_mask = var_8904_end_mask_0, x = var_8845_cast_fp16)[name = tensor("op_8904_cast_fp16")]; tensor var_8905_begin_0 = const()[name = tensor("op_8905_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8905_end_0 = const()[name = tensor("op_8905_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8905_end_mask_0 = const()[name = tensor("op_8905_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8905_cast_fp16 = slice_by_index(begin = var_8905_begin_0, end = var_8905_end_0, end_mask = var_8905_end_mask_0, x = var_8845_cast_fp16)[name = tensor("op_8905_cast_fp16")]; tensor var_8906_begin_0 = const()[name = tensor("op_8906_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8906_end_0 = const()[name = tensor("op_8906_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8906_end_mask_0 = const()[name = tensor("op_8906_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8906_cast_fp16 = slice_by_index(begin = var_8906_begin_0, end = var_8906_end_0, end_mask = var_8906_end_mask_0, x = var_8845_cast_fp16)[name = tensor("op_8906_cast_fp16")]; tensor var_8907_begin_0 = const()[name = tensor("op_8907_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8907_end_0 = const()[name = tensor("op_8907_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8907_end_mask_0 = const()[name = tensor("op_8907_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8907_cast_fp16 = slice_by_index(begin = var_8907_begin_0, end = var_8907_end_0, end_mask = var_8907_end_mask_0, x = var_8845_cast_fp16)[name = tensor("op_8907_cast_fp16")]; tensor var_8908_begin_0 = const()[name = tensor("op_8908_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8908_end_0 = const()[name = tensor("op_8908_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8908_end_mask_0 = const()[name = tensor("op_8908_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8908_cast_fp16 = slice_by_index(begin = var_8908_begin_0, end = var_8908_end_0, end_mask = var_8908_end_mask_0, x = var_8849_cast_fp16)[name = tensor("op_8908_cast_fp16")]; tensor var_8909_begin_0 = const()[name = tensor("op_8909_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8909_end_0 = const()[name = tensor("op_8909_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8909_end_mask_0 = const()[name = tensor("op_8909_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8909_cast_fp16 = slice_by_index(begin = var_8909_begin_0, end = var_8909_end_0, end_mask = var_8909_end_mask_0, x = var_8849_cast_fp16)[name = tensor("op_8909_cast_fp16")]; tensor var_8910_begin_0 = const()[name = tensor("op_8910_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8910_end_0 = const()[name = tensor("op_8910_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8910_end_mask_0 = const()[name = tensor("op_8910_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8910_cast_fp16 = slice_by_index(begin = var_8910_begin_0, end = var_8910_end_0, end_mask = var_8910_end_mask_0, x = var_8849_cast_fp16)[name = tensor("op_8910_cast_fp16")]; tensor var_8911_begin_0 = const()[name = tensor("op_8911_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8911_end_0 = const()[name = tensor("op_8911_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8911_end_mask_0 = const()[name = tensor("op_8911_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8911_cast_fp16 = slice_by_index(begin = var_8911_begin_0, end = var_8911_end_0, end_mask = var_8911_end_mask_0, x = var_8849_cast_fp16)[name = tensor("op_8911_cast_fp16")]; tensor var_8912_begin_0 = const()[name = tensor("op_8912_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8912_end_0 = const()[name = tensor("op_8912_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8912_end_mask_0 = const()[name = tensor("op_8912_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8912_cast_fp16 = slice_by_index(begin = var_8912_begin_0, end = var_8912_end_0, end_mask = var_8912_end_mask_0, x = var_8849_cast_fp16)[name = tensor("op_8912_cast_fp16")]; tensor var_8913_begin_0 = const()[name = tensor("op_8913_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8913_end_0 = const()[name = tensor("op_8913_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8913_end_mask_0 = const()[name = tensor("op_8913_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8913_cast_fp16 = slice_by_index(begin = var_8913_begin_0, end = var_8913_end_0, end_mask = var_8913_end_mask_0, x = var_8849_cast_fp16)[name = tensor("op_8913_cast_fp16")]; tensor var_8914_begin_0 = const()[name = tensor("op_8914_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8914_end_0 = const()[name = tensor("op_8914_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8914_end_mask_0 = const()[name = tensor("op_8914_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8914_cast_fp16 = slice_by_index(begin = var_8914_begin_0, end = var_8914_end_0, end_mask = var_8914_end_mask_0, x = var_8853_cast_fp16)[name = tensor("op_8914_cast_fp16")]; tensor var_8915_begin_0 = const()[name = tensor("op_8915_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8915_end_0 = const()[name = tensor("op_8915_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8915_end_mask_0 = const()[name = tensor("op_8915_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8915_cast_fp16 = slice_by_index(begin = var_8915_begin_0, end = var_8915_end_0, end_mask = var_8915_end_mask_0, x = var_8853_cast_fp16)[name = tensor("op_8915_cast_fp16")]; tensor var_8916_begin_0 = const()[name = tensor("op_8916_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8916_end_0 = const()[name = tensor("op_8916_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8916_end_mask_0 = const()[name = tensor("op_8916_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8916_cast_fp16 = slice_by_index(begin = var_8916_begin_0, end = var_8916_end_0, end_mask = var_8916_end_mask_0, x = var_8853_cast_fp16)[name = tensor("op_8916_cast_fp16")]; tensor var_8917_begin_0 = const()[name = tensor("op_8917_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8917_end_0 = const()[name = tensor("op_8917_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8917_end_mask_0 = const()[name = tensor("op_8917_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8917_cast_fp16 = slice_by_index(begin = var_8917_begin_0, end = var_8917_end_0, end_mask = var_8917_end_mask_0, x = var_8853_cast_fp16)[name = tensor("op_8917_cast_fp16")]; tensor var_8918_begin_0 = const()[name = tensor("op_8918_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8918_end_0 = const()[name = tensor("op_8918_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8918_end_mask_0 = const()[name = tensor("op_8918_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8918_cast_fp16 = slice_by_index(begin = var_8918_begin_0, end = var_8918_end_0, end_mask = var_8918_end_mask_0, x = var_8853_cast_fp16)[name = tensor("op_8918_cast_fp16")]; tensor var_8919_begin_0 = const()[name = tensor("op_8919_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8919_end_0 = const()[name = tensor("op_8919_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8919_end_mask_0 = const()[name = tensor("op_8919_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8919_cast_fp16 = slice_by_index(begin = var_8919_begin_0, end = var_8919_end_0, end_mask = var_8919_end_mask_0, x = var_8853_cast_fp16)[name = tensor("op_8919_cast_fp16")]; tensor var_8920_begin_0 = const()[name = tensor("op_8920_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8920_end_0 = const()[name = tensor("op_8920_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8920_end_mask_0 = const()[name = tensor("op_8920_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8920_cast_fp16 = slice_by_index(begin = var_8920_begin_0, end = var_8920_end_0, end_mask = var_8920_end_mask_0, x = var_8857_cast_fp16)[name = tensor("op_8920_cast_fp16")]; tensor var_8921_begin_0 = const()[name = tensor("op_8921_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8921_end_0 = const()[name = tensor("op_8921_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8921_end_mask_0 = const()[name = tensor("op_8921_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8921_cast_fp16 = slice_by_index(begin = var_8921_begin_0, end = var_8921_end_0, end_mask = var_8921_end_mask_0, x = var_8857_cast_fp16)[name = tensor("op_8921_cast_fp16")]; tensor var_8922_begin_0 = const()[name = tensor("op_8922_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8922_end_0 = const()[name = tensor("op_8922_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8922_end_mask_0 = const()[name = tensor("op_8922_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8922_cast_fp16 = slice_by_index(begin = var_8922_begin_0, end = var_8922_end_0, end_mask = var_8922_end_mask_0, x = var_8857_cast_fp16)[name = tensor("op_8922_cast_fp16")]; tensor var_8923_begin_0 = const()[name = tensor("op_8923_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8923_end_0 = const()[name = tensor("op_8923_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8923_end_mask_0 = const()[name = tensor("op_8923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8923_cast_fp16 = slice_by_index(begin = var_8923_begin_0, end = var_8923_end_0, end_mask = var_8923_end_mask_0, x = var_8857_cast_fp16)[name = tensor("op_8923_cast_fp16")]; tensor var_8924_begin_0 = const()[name = tensor("op_8924_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8924_end_0 = const()[name = tensor("op_8924_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8924_end_mask_0 = const()[name = tensor("op_8924_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8924_cast_fp16 = slice_by_index(begin = var_8924_begin_0, end = var_8924_end_0, end_mask = var_8924_end_mask_0, x = var_8857_cast_fp16)[name = tensor("op_8924_cast_fp16")]; tensor var_8925_begin_0 = const()[name = tensor("op_8925_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8925_end_0 = const()[name = tensor("op_8925_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8925_end_mask_0 = const()[name = tensor("op_8925_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8925_cast_fp16 = slice_by_index(begin = var_8925_begin_0, end = var_8925_end_0, end_mask = var_8925_end_mask_0, x = var_8857_cast_fp16)[name = tensor("op_8925_cast_fp16")]; tensor var_8926_begin_0 = const()[name = tensor("op_8926_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8926_end_0 = const()[name = tensor("op_8926_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8926_end_mask_0 = const()[name = tensor("op_8926_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8926_cast_fp16 = slice_by_index(begin = var_8926_begin_0, end = var_8926_end_0, end_mask = var_8926_end_mask_0, x = var_8861_cast_fp16)[name = tensor("op_8926_cast_fp16")]; tensor var_8927_begin_0 = const()[name = tensor("op_8927_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8927_end_0 = const()[name = tensor("op_8927_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8927_end_mask_0 = const()[name = tensor("op_8927_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8927_cast_fp16 = slice_by_index(begin = var_8927_begin_0, end = var_8927_end_0, end_mask = var_8927_end_mask_0, x = var_8861_cast_fp16)[name = tensor("op_8927_cast_fp16")]; tensor var_8928_begin_0 = const()[name = tensor("op_8928_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8928_end_0 = const()[name = tensor("op_8928_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8928_end_mask_0 = const()[name = tensor("op_8928_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8928_cast_fp16 = slice_by_index(begin = var_8928_begin_0, end = var_8928_end_0, end_mask = var_8928_end_mask_0, x = var_8861_cast_fp16)[name = tensor("op_8928_cast_fp16")]; tensor var_8929_begin_0 = const()[name = tensor("op_8929_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8929_end_0 = const()[name = tensor("op_8929_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8929_end_mask_0 = const()[name = tensor("op_8929_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8929_cast_fp16 = slice_by_index(begin = var_8929_begin_0, end = var_8929_end_0, end_mask = var_8929_end_mask_0, x = var_8861_cast_fp16)[name = tensor("op_8929_cast_fp16")]; tensor var_8930_begin_0 = const()[name = tensor("op_8930_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8930_end_0 = const()[name = tensor("op_8930_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8930_end_mask_0 = const()[name = tensor("op_8930_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8930_cast_fp16 = slice_by_index(begin = var_8930_begin_0, end = var_8930_end_0, end_mask = var_8930_end_mask_0, x = var_8861_cast_fp16)[name = tensor("op_8930_cast_fp16")]; tensor var_8931_begin_0 = const()[name = tensor("op_8931_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8931_end_0 = const()[name = tensor("op_8931_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8931_end_mask_0 = const()[name = tensor("op_8931_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8931_cast_fp16 = slice_by_index(begin = var_8931_begin_0, end = var_8931_end_0, end_mask = var_8931_end_mask_0, x = var_8861_cast_fp16)[name = tensor("op_8931_cast_fp16")]; tensor var_8932_begin_0 = const()[name = tensor("op_8932_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8932_end_0 = const()[name = tensor("op_8932_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8932_end_mask_0 = const()[name = tensor("op_8932_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8932_cast_fp16 = slice_by_index(begin = var_8932_begin_0, end = var_8932_end_0, end_mask = var_8932_end_mask_0, x = var_8865_cast_fp16)[name = tensor("op_8932_cast_fp16")]; tensor var_8933_begin_0 = const()[name = tensor("op_8933_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8933_end_0 = const()[name = tensor("op_8933_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8933_end_mask_0 = const()[name = tensor("op_8933_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8933_cast_fp16 = slice_by_index(begin = var_8933_begin_0, end = var_8933_end_0, end_mask = var_8933_end_mask_0, x = var_8865_cast_fp16)[name = tensor("op_8933_cast_fp16")]; tensor var_8934_begin_0 = const()[name = tensor("op_8934_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8934_end_0 = const()[name = tensor("op_8934_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8934_end_mask_0 = const()[name = tensor("op_8934_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8934_cast_fp16 = slice_by_index(begin = var_8934_begin_0, end = var_8934_end_0, end_mask = var_8934_end_mask_0, x = var_8865_cast_fp16)[name = tensor("op_8934_cast_fp16")]; tensor var_8935_begin_0 = const()[name = tensor("op_8935_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8935_end_0 = const()[name = tensor("op_8935_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8935_end_mask_0 = const()[name = tensor("op_8935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8935_cast_fp16 = slice_by_index(begin = var_8935_begin_0, end = var_8935_end_0, end_mask = var_8935_end_mask_0, x = var_8865_cast_fp16)[name = tensor("op_8935_cast_fp16")]; tensor var_8936_begin_0 = const()[name = tensor("op_8936_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8936_end_0 = const()[name = tensor("op_8936_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8936_end_mask_0 = const()[name = tensor("op_8936_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8936_cast_fp16 = slice_by_index(begin = var_8936_begin_0, end = var_8936_end_0, end_mask = var_8936_end_mask_0, x = var_8865_cast_fp16)[name = tensor("op_8936_cast_fp16")]; tensor var_8937_begin_0 = const()[name = tensor("op_8937_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8937_end_0 = const()[name = tensor("op_8937_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8937_end_mask_0 = const()[name = tensor("op_8937_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8937_cast_fp16 = slice_by_index(begin = var_8937_begin_0, end = var_8937_end_0, end_mask = var_8937_end_mask_0, x = var_8865_cast_fp16)[name = tensor("op_8937_cast_fp16")]; tensor var_8938_begin_0 = const()[name = tensor("op_8938_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8938_end_0 = const()[name = tensor("op_8938_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_8938_end_mask_0 = const()[name = tensor("op_8938_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8938_cast_fp16 = slice_by_index(begin = var_8938_begin_0, end = var_8938_end_0, end_mask = var_8938_end_mask_0, x = var_8869_cast_fp16)[name = tensor("op_8938_cast_fp16")]; tensor var_8939_begin_0 = const()[name = tensor("op_8939_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8939_end_0 = const()[name = tensor("op_8939_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_8939_end_mask_0 = const()[name = tensor("op_8939_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8939_cast_fp16 = slice_by_index(begin = var_8939_begin_0, end = var_8939_end_0, end_mask = var_8939_end_mask_0, x = var_8869_cast_fp16)[name = tensor("op_8939_cast_fp16")]; tensor var_8940_begin_0 = const()[name = tensor("op_8940_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8940_end_0 = const()[name = tensor("op_8940_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_8940_end_mask_0 = const()[name = tensor("op_8940_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8940_cast_fp16 = slice_by_index(begin = var_8940_begin_0, end = var_8940_end_0, end_mask = var_8940_end_mask_0, x = var_8869_cast_fp16)[name = tensor("op_8940_cast_fp16")]; tensor var_8941_begin_0 = const()[name = tensor("op_8941_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8941_end_0 = const()[name = tensor("op_8941_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_8941_end_mask_0 = const()[name = tensor("op_8941_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8941_cast_fp16 = slice_by_index(begin = var_8941_begin_0, end = var_8941_end_0, end_mask = var_8941_end_mask_0, x = var_8869_cast_fp16)[name = tensor("op_8941_cast_fp16")]; tensor var_8942_begin_0 = const()[name = tensor("op_8942_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8942_end_0 = const()[name = tensor("op_8942_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_8942_end_mask_0 = const()[name = tensor("op_8942_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8942_cast_fp16 = slice_by_index(begin = var_8942_begin_0, end = var_8942_end_0, end_mask = var_8942_end_mask_0, x = var_8869_cast_fp16)[name = tensor("op_8942_cast_fp16")]; tensor var_8943_begin_0 = const()[name = tensor("op_8943_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_8943_end_0 = const()[name = tensor("op_8943_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_8943_end_mask_0 = const()[name = tensor("op_8943_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8943_cast_fp16 = slice_by_index(begin = var_8943_begin_0, end = var_8943_end_0, end_mask = var_8943_end_mask_0, x = var_8869_cast_fp16)[name = tensor("op_8943_cast_fp16")]; tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_8948_begin_0 = const()[name = tensor("op_8948_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8948_end_0 = const()[name = tensor("op_8948_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_8948_end_mask_0 = const()[name = tensor("op_8948_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = key_21_cast_fp16)[name = tensor("transpose_1")]; tensor var_8948_cast_fp16 = slice_by_index(begin = var_8948_begin_0, end = var_8948_end_0, end_mask = var_8948_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8948_cast_fp16")]; tensor var_8952_begin_0 = const()[name = tensor("op_8952_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_8952_end_0 = const()[name = tensor("op_8952_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_8952_end_mask_0 = const()[name = tensor("op_8952_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8952_cast_fp16 = slice_by_index(begin = var_8952_begin_0, end = var_8952_end_0, end_mask = var_8952_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8952_cast_fp16")]; tensor var_8956_begin_0 = const()[name = tensor("op_8956_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_8956_end_0 = const()[name = tensor("op_8956_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_8956_end_mask_0 = const()[name = tensor("op_8956_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8956_cast_fp16 = slice_by_index(begin = var_8956_begin_0, end = var_8956_end_0, end_mask = var_8956_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8956_cast_fp16")]; tensor var_8960_begin_0 = const()[name = tensor("op_8960_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_8960_end_0 = const()[name = tensor("op_8960_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_8960_end_mask_0 = const()[name = tensor("op_8960_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8960_cast_fp16 = slice_by_index(begin = var_8960_begin_0, end = var_8960_end_0, end_mask = var_8960_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8960_cast_fp16")]; tensor var_8964_begin_0 = const()[name = tensor("op_8964_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8964_end_0 = const()[name = tensor("op_8964_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_8964_end_mask_0 = const()[name = tensor("op_8964_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8964_cast_fp16 = slice_by_index(begin = var_8964_begin_0, end = var_8964_end_0, end_mask = var_8964_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8964_cast_fp16")]; tensor var_8968_begin_0 = const()[name = tensor("op_8968_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_8968_end_0 = const()[name = tensor("op_8968_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_8968_end_mask_0 = const()[name = tensor("op_8968_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8968_cast_fp16 = slice_by_index(begin = var_8968_begin_0, end = var_8968_end_0, end_mask = var_8968_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8968_cast_fp16")]; tensor var_8972_begin_0 = const()[name = tensor("op_8972_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_8972_end_0 = const()[name = tensor("op_8972_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_8972_end_mask_0 = const()[name = tensor("op_8972_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8972_cast_fp16 = slice_by_index(begin = var_8972_begin_0, end = var_8972_end_0, end_mask = var_8972_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8972_cast_fp16")]; tensor var_8976_begin_0 = const()[name = tensor("op_8976_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_8976_end_0 = const()[name = tensor("op_8976_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_8976_end_mask_0 = const()[name = tensor("op_8976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8976_cast_fp16 = slice_by_index(begin = var_8976_begin_0, end = var_8976_end_0, end_mask = var_8976_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8976_cast_fp16")]; tensor var_8980_begin_0 = const()[name = tensor("op_8980_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8980_end_0 = const()[name = tensor("op_8980_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_8980_end_mask_0 = const()[name = tensor("op_8980_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8980_cast_fp16 = slice_by_index(begin = var_8980_begin_0, end = var_8980_end_0, end_mask = var_8980_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8980_cast_fp16")]; tensor var_8984_begin_0 = const()[name = tensor("op_8984_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_8984_end_0 = const()[name = tensor("op_8984_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_8984_end_mask_0 = const()[name = tensor("op_8984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8984_cast_fp16 = slice_by_index(begin = var_8984_begin_0, end = var_8984_end_0, end_mask = var_8984_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8984_cast_fp16")]; tensor var_8988_begin_0 = const()[name = tensor("op_8988_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_8988_end_0 = const()[name = tensor("op_8988_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_8988_end_mask_0 = const()[name = tensor("op_8988_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8988_cast_fp16 = slice_by_index(begin = var_8988_begin_0, end = var_8988_end_0, end_mask = var_8988_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8988_cast_fp16")]; tensor var_8992_begin_0 = const()[name = tensor("op_8992_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_8992_end_0 = const()[name = tensor("op_8992_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_8992_end_mask_0 = const()[name = tensor("op_8992_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_8992_cast_fp16 = slice_by_index(begin = var_8992_begin_0, end = var_8992_end_0, end_mask = var_8992_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_8992_cast_fp16")]; tensor var_8994_begin_0 = const()[name = tensor("op_8994_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8994_end_0 = const()[name = tensor("op_8994_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8994_end_mask_0 = const()[name = tensor("op_8994_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8994_cast_fp16 = slice_by_index(begin = var_8994_begin_0, end = var_8994_end_0, end_mask = var_8994_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_8994_cast_fp16")]; tensor var_8998_begin_0 = const()[name = tensor("op_8998_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_8998_end_0 = const()[name = tensor("op_8998_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_8998_end_mask_0 = const()[name = tensor("op_8998_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8998_cast_fp16 = slice_by_index(begin = var_8998_begin_0, end = var_8998_end_0, end_mask = var_8998_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_8998_cast_fp16")]; tensor var_9002_begin_0 = const()[name = tensor("op_9002_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_9002_end_0 = const()[name = tensor("op_9002_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_9002_end_mask_0 = const()[name = tensor("op_9002_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9002_cast_fp16 = slice_by_index(begin = var_9002_begin_0, end = var_9002_end_0, end_mask = var_9002_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9002_cast_fp16")]; tensor var_9006_begin_0 = const()[name = tensor("op_9006_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_9006_end_0 = const()[name = tensor("op_9006_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_9006_end_mask_0 = const()[name = tensor("op_9006_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9006_cast_fp16 = slice_by_index(begin = var_9006_begin_0, end = var_9006_end_0, end_mask = var_9006_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9006_cast_fp16")]; tensor var_9010_begin_0 = const()[name = tensor("op_9010_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_9010_end_0 = const()[name = tensor("op_9010_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_9010_end_mask_0 = const()[name = tensor("op_9010_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9010_cast_fp16 = slice_by_index(begin = var_9010_begin_0, end = var_9010_end_0, end_mask = var_9010_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9010_cast_fp16")]; tensor var_9014_begin_0 = const()[name = tensor("op_9014_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9014_end_0 = const()[name = tensor("op_9014_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_9014_end_mask_0 = const()[name = tensor("op_9014_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9014_cast_fp16 = slice_by_index(begin = var_9014_begin_0, end = var_9014_end_0, end_mask = var_9014_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9014_cast_fp16")]; tensor var_9018_begin_0 = const()[name = tensor("op_9018_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_9018_end_0 = const()[name = tensor("op_9018_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_9018_end_mask_0 = const()[name = tensor("op_9018_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9018_cast_fp16 = slice_by_index(begin = var_9018_begin_0, end = var_9018_end_0, end_mask = var_9018_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9018_cast_fp16")]; tensor var_9022_begin_0 = const()[name = tensor("op_9022_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_9022_end_0 = const()[name = tensor("op_9022_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_9022_end_mask_0 = const()[name = tensor("op_9022_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9022_cast_fp16 = slice_by_index(begin = var_9022_begin_0, end = var_9022_end_0, end_mask = var_9022_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9022_cast_fp16")]; tensor var_9026_begin_0 = const()[name = tensor("op_9026_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_9026_end_0 = const()[name = tensor("op_9026_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_9026_end_mask_0 = const()[name = tensor("op_9026_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9026_cast_fp16 = slice_by_index(begin = var_9026_begin_0, end = var_9026_end_0, end_mask = var_9026_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9026_cast_fp16")]; tensor var_9030_begin_0 = const()[name = tensor("op_9030_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_9030_end_0 = const()[name = tensor("op_9030_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_9030_end_mask_0 = const()[name = tensor("op_9030_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9030_cast_fp16 = slice_by_index(begin = var_9030_begin_0, end = var_9030_end_0, end_mask = var_9030_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9030_cast_fp16")]; tensor var_9034_begin_0 = const()[name = tensor("op_9034_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_9034_end_0 = const()[name = tensor("op_9034_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_9034_end_mask_0 = const()[name = tensor("op_9034_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9034_cast_fp16 = slice_by_index(begin = var_9034_begin_0, end = var_9034_end_0, end_mask = var_9034_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9034_cast_fp16")]; tensor var_9038_begin_0 = const()[name = tensor("op_9038_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_9038_end_0 = const()[name = tensor("op_9038_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_9038_end_mask_0 = const()[name = tensor("op_9038_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9038_cast_fp16 = slice_by_index(begin = var_9038_begin_0, end = var_9038_end_0, end_mask = var_9038_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_9038_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1441_equation_0, values = (var_8948_cast_fp16, var_8872_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1443_equation_0, values = (var_8948_cast_fp16, var_8873_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1445_equation_0, values = (var_8948_cast_fp16, var_8874_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1447_equation_0, values = (var_8948_cast_fp16, var_8875_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1449_equation_0, values = (var_8948_cast_fp16, var_8876_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1451_equation_0, values = (var_8948_cast_fp16, var_8877_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1453_equation_0, values = (var_8952_cast_fp16, var_8878_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1455_equation_0, values = (var_8952_cast_fp16, var_8879_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1457_equation_0, values = (var_8952_cast_fp16, var_8880_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1459_equation_0, values = (var_8952_cast_fp16, var_8881_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1461_equation_0, values = (var_8952_cast_fp16, var_8882_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1463_equation_0, values = (var_8952_cast_fp16, var_8883_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1465_equation_0, values = (var_8956_cast_fp16, var_8884_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1467_equation_0, values = (var_8956_cast_fp16, var_8885_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1469_equation_0, values = (var_8956_cast_fp16, var_8886_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1471_equation_0, values = (var_8956_cast_fp16, var_8887_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1473_equation_0, values = (var_8956_cast_fp16, var_8888_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1475_equation_0, values = (var_8956_cast_fp16, var_8889_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1477_equation_0, values = (var_8960_cast_fp16, var_8890_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1479_equation_0, values = (var_8960_cast_fp16, var_8891_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1479_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1481_equation_0, values = (var_8960_cast_fp16, var_8892_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1483_equation_0, values = (var_8960_cast_fp16, var_8893_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1485_equation_0, values = (var_8960_cast_fp16, var_8894_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1487_equation_0, values = (var_8960_cast_fp16, var_8895_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1489_equation_0, values = (var_8964_cast_fp16, var_8896_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1491_equation_0, values = (var_8964_cast_fp16, var_8897_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1493_equation_0, values = (var_8964_cast_fp16, var_8898_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1495_equation_0, values = (var_8964_cast_fp16, var_8899_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1497_equation_0, values = (var_8964_cast_fp16, var_8900_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1499_equation_0, values = (var_8964_cast_fp16, var_8901_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1501_equation_0, values = (var_8968_cast_fp16, var_8902_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1503_equation_0, values = (var_8968_cast_fp16, var_8903_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1505_equation_0, values = (var_8968_cast_fp16, var_8904_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1507_equation_0, values = (var_8968_cast_fp16, var_8905_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1509_equation_0, values = (var_8968_cast_fp16, var_8906_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1511_equation_0, values = (var_8968_cast_fp16, var_8907_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1513_equation_0, values = (var_8972_cast_fp16, var_8908_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1515_equation_0, values = (var_8972_cast_fp16, var_8909_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1517_equation_0, values = (var_8972_cast_fp16, var_8910_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1519_equation_0, values = (var_8972_cast_fp16, var_8911_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1521_equation_0, values = (var_8972_cast_fp16, var_8912_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1523_equation_0, values = (var_8972_cast_fp16, var_8913_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1525_equation_0, values = (var_8976_cast_fp16, var_8914_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1527_equation_0, values = (var_8976_cast_fp16, var_8915_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1529_equation_0, values = (var_8976_cast_fp16, var_8916_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1531_equation_0, values = (var_8976_cast_fp16, var_8917_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1533_equation_0, values = (var_8976_cast_fp16, var_8918_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1535_equation_0, values = (var_8976_cast_fp16, var_8919_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1537_equation_0, values = (var_8980_cast_fp16, var_8920_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1539_equation_0, values = (var_8980_cast_fp16, var_8921_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1541_equation_0, values = (var_8980_cast_fp16, var_8922_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1543_equation_0, values = (var_8980_cast_fp16, var_8923_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1545_equation_0, values = (var_8980_cast_fp16, var_8924_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1547_equation_0, values = (var_8980_cast_fp16, var_8925_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1549_equation_0, values = (var_8984_cast_fp16, var_8926_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1551_equation_0, values = (var_8984_cast_fp16, var_8927_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1553_equation_0, values = (var_8984_cast_fp16, var_8928_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1555_equation_0, values = (var_8984_cast_fp16, var_8929_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1557_equation_0, values = (var_8984_cast_fp16, var_8930_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1559_equation_0, values = (var_8984_cast_fp16, var_8931_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1561_equation_0, values = (var_8988_cast_fp16, var_8932_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1563_equation_0, values = (var_8988_cast_fp16, var_8933_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1565_equation_0, values = (var_8988_cast_fp16, var_8934_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1567_equation_0, values = (var_8988_cast_fp16, var_8935_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1569_equation_0, values = (var_8988_cast_fp16, var_8936_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1571_equation_0, values = (var_8988_cast_fp16, var_8937_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1573_equation_0, values = (var_8992_cast_fp16, var_8938_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1575_equation_0, values = (var_8992_cast_fp16, var_8939_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1575_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1577_equation_0, values = (var_8992_cast_fp16, var_8940_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1579_equation_0, values = (var_8992_cast_fp16, var_8941_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1581_equation_0, values = (var_8992_cast_fp16, var_8942_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1583_equation_0, values = (var_8992_cast_fp16, var_8943_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1583_cast_fp16")]; tensor var_9185_to_fp16 = const()[name = tensor("op_9185_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1441_cast_fp16, y = var_9185_to_fp16)[name = tensor("aw_chunk_1441_cast_fp16")]; tensor var_9187_to_fp16 = const()[name = tensor("op_9187_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1443_cast_fp16, y = var_9187_to_fp16)[name = tensor("aw_chunk_1443_cast_fp16")]; tensor var_9189_to_fp16 = const()[name = tensor("op_9189_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1445_cast_fp16, y = var_9189_to_fp16)[name = tensor("aw_chunk_1445_cast_fp16")]; tensor var_9191_to_fp16 = const()[name = tensor("op_9191_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1447_cast_fp16, y = var_9191_to_fp16)[name = tensor("aw_chunk_1447_cast_fp16")]; tensor var_9193_to_fp16 = const()[name = tensor("op_9193_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1449_cast_fp16, y = var_9193_to_fp16)[name = tensor("aw_chunk_1449_cast_fp16")]; tensor var_9195_to_fp16 = const()[name = tensor("op_9195_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1451_cast_fp16, y = var_9195_to_fp16)[name = tensor("aw_chunk_1451_cast_fp16")]; tensor var_9197_to_fp16 = const()[name = tensor("op_9197_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1453_cast_fp16, y = var_9197_to_fp16)[name = tensor("aw_chunk_1453_cast_fp16")]; tensor var_9199_to_fp16 = const()[name = tensor("op_9199_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1455_cast_fp16, y = var_9199_to_fp16)[name = tensor("aw_chunk_1455_cast_fp16")]; tensor var_9201_to_fp16 = const()[name = tensor("op_9201_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1457_cast_fp16, y = var_9201_to_fp16)[name = tensor("aw_chunk_1457_cast_fp16")]; tensor var_9203_to_fp16 = const()[name = tensor("op_9203_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1459_cast_fp16, y = var_9203_to_fp16)[name = tensor("aw_chunk_1459_cast_fp16")]; tensor var_9205_to_fp16 = const()[name = tensor("op_9205_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1461_cast_fp16, y = var_9205_to_fp16)[name = tensor("aw_chunk_1461_cast_fp16")]; tensor var_9207_to_fp16 = const()[name = tensor("op_9207_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1463_cast_fp16, y = var_9207_to_fp16)[name = tensor("aw_chunk_1463_cast_fp16")]; tensor var_9209_to_fp16 = const()[name = tensor("op_9209_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1465_cast_fp16, y = var_9209_to_fp16)[name = tensor("aw_chunk_1465_cast_fp16")]; tensor var_9211_to_fp16 = const()[name = tensor("op_9211_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1467_cast_fp16, y = var_9211_to_fp16)[name = tensor("aw_chunk_1467_cast_fp16")]; tensor var_9213_to_fp16 = const()[name = tensor("op_9213_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1469_cast_fp16, y = var_9213_to_fp16)[name = tensor("aw_chunk_1469_cast_fp16")]; tensor var_9215_to_fp16 = const()[name = tensor("op_9215_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1471_cast_fp16, y = var_9215_to_fp16)[name = tensor("aw_chunk_1471_cast_fp16")]; tensor var_9217_to_fp16 = const()[name = tensor("op_9217_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1473_cast_fp16, y = var_9217_to_fp16)[name = tensor("aw_chunk_1473_cast_fp16")]; tensor var_9219_to_fp16 = const()[name = tensor("op_9219_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1475_cast_fp16, y = var_9219_to_fp16)[name = tensor("aw_chunk_1475_cast_fp16")]; tensor var_9221_to_fp16 = const()[name = tensor("op_9221_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1477_cast_fp16, y = var_9221_to_fp16)[name = tensor("aw_chunk_1477_cast_fp16")]; tensor var_9223_to_fp16 = const()[name = tensor("op_9223_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1479_cast_fp16, y = var_9223_to_fp16)[name = tensor("aw_chunk_1479_cast_fp16")]; tensor var_9225_to_fp16 = const()[name = tensor("op_9225_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1481_cast_fp16, y = var_9225_to_fp16)[name = tensor("aw_chunk_1481_cast_fp16")]; tensor var_9227_to_fp16 = const()[name = tensor("op_9227_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1483_cast_fp16, y = var_9227_to_fp16)[name = tensor("aw_chunk_1483_cast_fp16")]; tensor var_9229_to_fp16 = const()[name = tensor("op_9229_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1485_cast_fp16, y = var_9229_to_fp16)[name = tensor("aw_chunk_1485_cast_fp16")]; tensor var_9231_to_fp16 = const()[name = tensor("op_9231_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1487_cast_fp16, y = var_9231_to_fp16)[name = tensor("aw_chunk_1487_cast_fp16")]; tensor var_9233_to_fp16 = const()[name = tensor("op_9233_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1489_cast_fp16, y = var_9233_to_fp16)[name = tensor("aw_chunk_1489_cast_fp16")]; tensor var_9235_to_fp16 = const()[name = tensor("op_9235_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1491_cast_fp16, y = var_9235_to_fp16)[name = tensor("aw_chunk_1491_cast_fp16")]; tensor var_9237_to_fp16 = const()[name = tensor("op_9237_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1493_cast_fp16, y = var_9237_to_fp16)[name = tensor("aw_chunk_1493_cast_fp16")]; tensor var_9239_to_fp16 = const()[name = tensor("op_9239_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1495_cast_fp16, y = var_9239_to_fp16)[name = tensor("aw_chunk_1495_cast_fp16")]; tensor var_9241_to_fp16 = const()[name = tensor("op_9241_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1497_cast_fp16, y = var_9241_to_fp16)[name = tensor("aw_chunk_1497_cast_fp16")]; tensor var_9243_to_fp16 = const()[name = tensor("op_9243_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1499_cast_fp16, y = var_9243_to_fp16)[name = tensor("aw_chunk_1499_cast_fp16")]; tensor var_9245_to_fp16 = const()[name = tensor("op_9245_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1501_cast_fp16, y = var_9245_to_fp16)[name = tensor("aw_chunk_1501_cast_fp16")]; tensor var_9247_to_fp16 = const()[name = tensor("op_9247_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1503_cast_fp16, y = var_9247_to_fp16)[name = tensor("aw_chunk_1503_cast_fp16")]; tensor var_9249_to_fp16 = const()[name = tensor("op_9249_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1505_cast_fp16, y = var_9249_to_fp16)[name = tensor("aw_chunk_1505_cast_fp16")]; tensor var_9251_to_fp16 = const()[name = tensor("op_9251_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1507_cast_fp16, y = var_9251_to_fp16)[name = tensor("aw_chunk_1507_cast_fp16")]; tensor var_9253_to_fp16 = const()[name = tensor("op_9253_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1509_cast_fp16, y = var_9253_to_fp16)[name = tensor("aw_chunk_1509_cast_fp16")]; tensor var_9255_to_fp16 = const()[name = tensor("op_9255_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1511_cast_fp16, y = var_9255_to_fp16)[name = tensor("aw_chunk_1511_cast_fp16")]; tensor var_9257_to_fp16 = const()[name = tensor("op_9257_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1513_cast_fp16, y = var_9257_to_fp16)[name = tensor("aw_chunk_1513_cast_fp16")]; tensor var_9259_to_fp16 = const()[name = tensor("op_9259_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1515_cast_fp16, y = var_9259_to_fp16)[name = tensor("aw_chunk_1515_cast_fp16")]; tensor var_9261_to_fp16 = const()[name = tensor("op_9261_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1517_cast_fp16, y = var_9261_to_fp16)[name = tensor("aw_chunk_1517_cast_fp16")]; tensor var_9263_to_fp16 = const()[name = tensor("op_9263_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1519_cast_fp16, y = var_9263_to_fp16)[name = tensor("aw_chunk_1519_cast_fp16")]; tensor var_9265_to_fp16 = const()[name = tensor("op_9265_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1521_cast_fp16, y = var_9265_to_fp16)[name = tensor("aw_chunk_1521_cast_fp16")]; tensor var_9267_to_fp16 = const()[name = tensor("op_9267_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1523_cast_fp16, y = var_9267_to_fp16)[name = tensor("aw_chunk_1523_cast_fp16")]; tensor var_9269_to_fp16 = const()[name = tensor("op_9269_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1525_cast_fp16, y = var_9269_to_fp16)[name = tensor("aw_chunk_1525_cast_fp16")]; tensor var_9271_to_fp16 = const()[name = tensor("op_9271_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1527_cast_fp16, y = var_9271_to_fp16)[name = tensor("aw_chunk_1527_cast_fp16")]; tensor var_9273_to_fp16 = const()[name = tensor("op_9273_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1529_cast_fp16, y = var_9273_to_fp16)[name = tensor("aw_chunk_1529_cast_fp16")]; tensor var_9275_to_fp16 = const()[name = tensor("op_9275_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1531_cast_fp16, y = var_9275_to_fp16)[name = tensor("aw_chunk_1531_cast_fp16")]; tensor var_9277_to_fp16 = const()[name = tensor("op_9277_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1533_cast_fp16, y = var_9277_to_fp16)[name = tensor("aw_chunk_1533_cast_fp16")]; tensor var_9279_to_fp16 = const()[name = tensor("op_9279_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1535_cast_fp16, y = var_9279_to_fp16)[name = tensor("aw_chunk_1535_cast_fp16")]; tensor var_9281_to_fp16 = const()[name = tensor("op_9281_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1537_cast_fp16, y = var_9281_to_fp16)[name = tensor("aw_chunk_1537_cast_fp16")]; tensor var_9283_to_fp16 = const()[name = tensor("op_9283_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1539_cast_fp16, y = var_9283_to_fp16)[name = tensor("aw_chunk_1539_cast_fp16")]; tensor var_9285_to_fp16 = const()[name = tensor("op_9285_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1541_cast_fp16, y = var_9285_to_fp16)[name = tensor("aw_chunk_1541_cast_fp16")]; tensor var_9287_to_fp16 = const()[name = tensor("op_9287_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1543_cast_fp16, y = var_9287_to_fp16)[name = tensor("aw_chunk_1543_cast_fp16")]; tensor var_9289_to_fp16 = const()[name = tensor("op_9289_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1545_cast_fp16, y = var_9289_to_fp16)[name = tensor("aw_chunk_1545_cast_fp16")]; tensor var_9291_to_fp16 = const()[name = tensor("op_9291_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1547_cast_fp16, y = var_9291_to_fp16)[name = tensor("aw_chunk_1547_cast_fp16")]; tensor var_9293_to_fp16 = const()[name = tensor("op_9293_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1549_cast_fp16, y = var_9293_to_fp16)[name = tensor("aw_chunk_1549_cast_fp16")]; tensor var_9295_to_fp16 = const()[name = tensor("op_9295_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1551_cast_fp16, y = var_9295_to_fp16)[name = tensor("aw_chunk_1551_cast_fp16")]; tensor var_9297_to_fp16 = const()[name = tensor("op_9297_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1553_cast_fp16, y = var_9297_to_fp16)[name = tensor("aw_chunk_1553_cast_fp16")]; tensor var_9299_to_fp16 = const()[name = tensor("op_9299_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1555_cast_fp16, y = var_9299_to_fp16)[name = tensor("aw_chunk_1555_cast_fp16")]; tensor var_9301_to_fp16 = const()[name = tensor("op_9301_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1557_cast_fp16, y = var_9301_to_fp16)[name = tensor("aw_chunk_1557_cast_fp16")]; tensor var_9303_to_fp16 = const()[name = tensor("op_9303_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1559_cast_fp16, y = var_9303_to_fp16)[name = tensor("aw_chunk_1559_cast_fp16")]; tensor var_9305_to_fp16 = const()[name = tensor("op_9305_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1561_cast_fp16, y = var_9305_to_fp16)[name = tensor("aw_chunk_1561_cast_fp16")]; tensor var_9307_to_fp16 = const()[name = tensor("op_9307_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1563_cast_fp16, y = var_9307_to_fp16)[name = tensor("aw_chunk_1563_cast_fp16")]; tensor var_9309_to_fp16 = const()[name = tensor("op_9309_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1565_cast_fp16, y = var_9309_to_fp16)[name = tensor("aw_chunk_1565_cast_fp16")]; tensor var_9311_to_fp16 = const()[name = tensor("op_9311_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1567_cast_fp16, y = var_9311_to_fp16)[name = tensor("aw_chunk_1567_cast_fp16")]; tensor var_9313_to_fp16 = const()[name = tensor("op_9313_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1569_cast_fp16, y = var_9313_to_fp16)[name = tensor("aw_chunk_1569_cast_fp16")]; tensor var_9315_to_fp16 = const()[name = tensor("op_9315_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1571_cast_fp16, y = var_9315_to_fp16)[name = tensor("aw_chunk_1571_cast_fp16")]; tensor var_9317_to_fp16 = const()[name = tensor("op_9317_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1573_cast_fp16, y = var_9317_to_fp16)[name = tensor("aw_chunk_1573_cast_fp16")]; tensor var_9319_to_fp16 = const()[name = tensor("op_9319_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1575_cast_fp16, y = var_9319_to_fp16)[name = tensor("aw_chunk_1575_cast_fp16")]; tensor var_9321_to_fp16 = const()[name = tensor("op_9321_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1577_cast_fp16, y = var_9321_to_fp16)[name = tensor("aw_chunk_1577_cast_fp16")]; tensor var_9323_to_fp16 = const()[name = tensor("op_9323_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1579_cast_fp16, y = var_9323_to_fp16)[name = tensor("aw_chunk_1579_cast_fp16")]; tensor var_9325_to_fp16 = const()[name = tensor("op_9325_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1581_cast_fp16, y = var_9325_to_fp16)[name = tensor("aw_chunk_1581_cast_fp16")]; tensor var_9327_to_fp16 = const()[name = tensor("op_9327_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1583_cast_fp16, y = var_9327_to_fp16)[name = tensor("aw_chunk_1583_cast_fp16")]; tensor var_9329_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1441_cast_fp16)[name = tensor("op_9329_cast_fp16")]; tensor var_9330_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1443_cast_fp16)[name = tensor("op_9330_cast_fp16")]; tensor var_9331_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1445_cast_fp16)[name = tensor("op_9331_cast_fp16")]; tensor var_9332_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1447_cast_fp16)[name = tensor("op_9332_cast_fp16")]; tensor var_9333_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1449_cast_fp16)[name = tensor("op_9333_cast_fp16")]; tensor var_9334_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1451_cast_fp16)[name = tensor("op_9334_cast_fp16")]; tensor var_9335_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1453_cast_fp16)[name = tensor("op_9335_cast_fp16")]; tensor var_9336_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1455_cast_fp16)[name = tensor("op_9336_cast_fp16")]; tensor var_9337_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1457_cast_fp16)[name = tensor("op_9337_cast_fp16")]; tensor var_9338_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1459_cast_fp16)[name = tensor("op_9338_cast_fp16")]; tensor var_9339_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1461_cast_fp16)[name = tensor("op_9339_cast_fp16")]; tensor var_9340_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1463_cast_fp16)[name = tensor("op_9340_cast_fp16")]; tensor var_9341_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1465_cast_fp16)[name = tensor("op_9341_cast_fp16")]; tensor var_9342_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1467_cast_fp16)[name = tensor("op_9342_cast_fp16")]; tensor var_9343_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1469_cast_fp16)[name = tensor("op_9343_cast_fp16")]; tensor var_9344_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1471_cast_fp16)[name = tensor("op_9344_cast_fp16")]; tensor var_9345_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1473_cast_fp16)[name = tensor("op_9345_cast_fp16")]; tensor var_9346_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1475_cast_fp16)[name = tensor("op_9346_cast_fp16")]; tensor var_9347_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1477_cast_fp16)[name = tensor("op_9347_cast_fp16")]; tensor var_9348_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1479_cast_fp16)[name = tensor("op_9348_cast_fp16")]; tensor var_9349_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1481_cast_fp16)[name = tensor("op_9349_cast_fp16")]; tensor var_9350_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1483_cast_fp16)[name = tensor("op_9350_cast_fp16")]; tensor var_9351_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1485_cast_fp16)[name = tensor("op_9351_cast_fp16")]; tensor var_9352_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1487_cast_fp16)[name = tensor("op_9352_cast_fp16")]; tensor var_9353_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1489_cast_fp16)[name = tensor("op_9353_cast_fp16")]; tensor var_9354_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1491_cast_fp16)[name = tensor("op_9354_cast_fp16")]; tensor var_9355_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1493_cast_fp16)[name = tensor("op_9355_cast_fp16")]; tensor var_9356_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1495_cast_fp16)[name = tensor("op_9356_cast_fp16")]; tensor var_9357_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1497_cast_fp16)[name = tensor("op_9357_cast_fp16")]; tensor var_9358_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1499_cast_fp16)[name = tensor("op_9358_cast_fp16")]; tensor var_9359_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1501_cast_fp16)[name = tensor("op_9359_cast_fp16")]; tensor var_9360_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1503_cast_fp16)[name = tensor("op_9360_cast_fp16")]; tensor var_9361_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1505_cast_fp16)[name = tensor("op_9361_cast_fp16")]; tensor var_9362_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1507_cast_fp16)[name = tensor("op_9362_cast_fp16")]; tensor var_9363_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1509_cast_fp16)[name = tensor("op_9363_cast_fp16")]; tensor var_9364_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1511_cast_fp16)[name = tensor("op_9364_cast_fp16")]; tensor var_9365_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1513_cast_fp16)[name = tensor("op_9365_cast_fp16")]; tensor var_9366_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1515_cast_fp16)[name = tensor("op_9366_cast_fp16")]; tensor var_9367_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1517_cast_fp16)[name = tensor("op_9367_cast_fp16")]; tensor var_9368_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1519_cast_fp16)[name = tensor("op_9368_cast_fp16")]; tensor var_9369_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1521_cast_fp16)[name = tensor("op_9369_cast_fp16")]; tensor var_9370_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1523_cast_fp16)[name = tensor("op_9370_cast_fp16")]; tensor var_9371_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1525_cast_fp16)[name = tensor("op_9371_cast_fp16")]; tensor var_9372_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1527_cast_fp16)[name = tensor("op_9372_cast_fp16")]; tensor var_9373_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1529_cast_fp16)[name = tensor("op_9373_cast_fp16")]; tensor var_9374_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1531_cast_fp16)[name = tensor("op_9374_cast_fp16")]; tensor var_9375_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1533_cast_fp16)[name = tensor("op_9375_cast_fp16")]; tensor var_9376_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1535_cast_fp16)[name = tensor("op_9376_cast_fp16")]; tensor var_9377_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1537_cast_fp16)[name = tensor("op_9377_cast_fp16")]; tensor var_9378_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1539_cast_fp16)[name = tensor("op_9378_cast_fp16")]; tensor var_9379_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1541_cast_fp16)[name = tensor("op_9379_cast_fp16")]; tensor var_9380_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1543_cast_fp16)[name = tensor("op_9380_cast_fp16")]; tensor var_9381_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1545_cast_fp16)[name = tensor("op_9381_cast_fp16")]; tensor var_9382_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1547_cast_fp16)[name = tensor("op_9382_cast_fp16")]; tensor var_9383_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1549_cast_fp16)[name = tensor("op_9383_cast_fp16")]; tensor var_9384_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1551_cast_fp16)[name = tensor("op_9384_cast_fp16")]; tensor var_9385_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1553_cast_fp16)[name = tensor("op_9385_cast_fp16")]; tensor var_9386_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1555_cast_fp16)[name = tensor("op_9386_cast_fp16")]; tensor var_9387_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1557_cast_fp16)[name = tensor("op_9387_cast_fp16")]; tensor var_9388_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1559_cast_fp16)[name = tensor("op_9388_cast_fp16")]; tensor var_9389_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1561_cast_fp16)[name = tensor("op_9389_cast_fp16")]; tensor var_9390_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1563_cast_fp16)[name = tensor("op_9390_cast_fp16")]; tensor var_9391_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1565_cast_fp16)[name = tensor("op_9391_cast_fp16")]; tensor var_9392_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1567_cast_fp16)[name = tensor("op_9392_cast_fp16")]; tensor var_9393_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1569_cast_fp16)[name = tensor("op_9393_cast_fp16")]; tensor var_9394_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1571_cast_fp16)[name = tensor("op_9394_cast_fp16")]; tensor var_9395_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1573_cast_fp16)[name = tensor("op_9395_cast_fp16")]; tensor var_9396_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1575_cast_fp16)[name = tensor("op_9396_cast_fp16")]; tensor var_9397_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1577_cast_fp16)[name = tensor("op_9397_cast_fp16")]; tensor var_9398_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1579_cast_fp16)[name = tensor("op_9398_cast_fp16")]; tensor var_9399_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1581_cast_fp16)[name = tensor("op_9399_cast_fp16")]; tensor var_9400_cast_fp16 = softmax(axis = var_8773, x = aw_chunk_1583_cast_fp16)[name = tensor("op_9400_cast_fp16")]; tensor var_9402_equation_0 = const()[name = tensor("op_9402_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9402_cast_fp16 = einsum(equation = var_9402_equation_0, values = (var_8994_cast_fp16, var_9329_cast_fp16))[name = tensor("op_9402_cast_fp16")]; tensor var_9404_equation_0 = const()[name = tensor("op_9404_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9404_cast_fp16 = einsum(equation = var_9404_equation_0, values = (var_8994_cast_fp16, var_9330_cast_fp16))[name = tensor("op_9404_cast_fp16")]; tensor var_9406_equation_0 = const()[name = tensor("op_9406_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9406_cast_fp16 = einsum(equation = var_9406_equation_0, values = (var_8994_cast_fp16, var_9331_cast_fp16))[name = tensor("op_9406_cast_fp16")]; tensor var_9408_equation_0 = const()[name = tensor("op_9408_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9408_cast_fp16 = einsum(equation = var_9408_equation_0, values = (var_8994_cast_fp16, var_9332_cast_fp16))[name = tensor("op_9408_cast_fp16")]; tensor var_9410_equation_0 = const()[name = tensor("op_9410_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9410_cast_fp16 = einsum(equation = var_9410_equation_0, values = (var_8994_cast_fp16, var_9333_cast_fp16))[name = tensor("op_9410_cast_fp16")]; tensor var_9412_equation_0 = const()[name = tensor("op_9412_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9412_cast_fp16 = einsum(equation = var_9412_equation_0, values = (var_8994_cast_fp16, var_9334_cast_fp16))[name = tensor("op_9412_cast_fp16")]; tensor var_9414_equation_0 = const()[name = tensor("op_9414_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9414_cast_fp16 = einsum(equation = var_9414_equation_0, values = (var_8998_cast_fp16, var_9335_cast_fp16))[name = tensor("op_9414_cast_fp16")]; tensor var_9416_equation_0 = const()[name = tensor("op_9416_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9416_cast_fp16 = einsum(equation = var_9416_equation_0, values = (var_8998_cast_fp16, var_9336_cast_fp16))[name = tensor("op_9416_cast_fp16")]; tensor var_9418_equation_0 = const()[name = tensor("op_9418_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9418_cast_fp16 = einsum(equation = var_9418_equation_0, values = (var_8998_cast_fp16, var_9337_cast_fp16))[name = tensor("op_9418_cast_fp16")]; tensor var_9420_equation_0 = const()[name = tensor("op_9420_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9420_cast_fp16 = einsum(equation = var_9420_equation_0, values = (var_8998_cast_fp16, var_9338_cast_fp16))[name = tensor("op_9420_cast_fp16")]; tensor var_9422_equation_0 = const()[name = tensor("op_9422_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9422_cast_fp16 = einsum(equation = var_9422_equation_0, values = (var_8998_cast_fp16, var_9339_cast_fp16))[name = tensor("op_9422_cast_fp16")]; tensor var_9424_equation_0 = const()[name = tensor("op_9424_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9424_cast_fp16 = einsum(equation = var_9424_equation_0, values = (var_8998_cast_fp16, var_9340_cast_fp16))[name = tensor("op_9424_cast_fp16")]; tensor var_9426_equation_0 = const()[name = tensor("op_9426_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9426_cast_fp16 = einsum(equation = var_9426_equation_0, values = (var_9002_cast_fp16, var_9341_cast_fp16))[name = tensor("op_9426_cast_fp16")]; tensor var_9428_equation_0 = const()[name = tensor("op_9428_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9428_cast_fp16 = einsum(equation = var_9428_equation_0, values = (var_9002_cast_fp16, var_9342_cast_fp16))[name = tensor("op_9428_cast_fp16")]; tensor var_9430_equation_0 = const()[name = tensor("op_9430_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9430_cast_fp16 = einsum(equation = var_9430_equation_0, values = (var_9002_cast_fp16, var_9343_cast_fp16))[name = tensor("op_9430_cast_fp16")]; tensor var_9432_equation_0 = const()[name = tensor("op_9432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9432_cast_fp16 = einsum(equation = var_9432_equation_0, values = (var_9002_cast_fp16, var_9344_cast_fp16))[name = tensor("op_9432_cast_fp16")]; tensor var_9434_equation_0 = const()[name = tensor("op_9434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9434_cast_fp16 = einsum(equation = var_9434_equation_0, values = (var_9002_cast_fp16, var_9345_cast_fp16))[name = tensor("op_9434_cast_fp16")]; tensor var_9436_equation_0 = const()[name = tensor("op_9436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9436_cast_fp16 = einsum(equation = var_9436_equation_0, values = (var_9002_cast_fp16, var_9346_cast_fp16))[name = tensor("op_9436_cast_fp16")]; tensor var_9438_equation_0 = const()[name = tensor("op_9438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9438_cast_fp16 = einsum(equation = var_9438_equation_0, values = (var_9006_cast_fp16, var_9347_cast_fp16))[name = tensor("op_9438_cast_fp16")]; tensor var_9440_equation_0 = const()[name = tensor("op_9440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9440_cast_fp16 = einsum(equation = var_9440_equation_0, values = (var_9006_cast_fp16, var_9348_cast_fp16))[name = tensor("op_9440_cast_fp16")]; tensor var_9442_equation_0 = const()[name = tensor("op_9442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9442_cast_fp16 = einsum(equation = var_9442_equation_0, values = (var_9006_cast_fp16, var_9349_cast_fp16))[name = tensor("op_9442_cast_fp16")]; tensor var_9444_equation_0 = const()[name = tensor("op_9444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9444_cast_fp16 = einsum(equation = var_9444_equation_0, values = (var_9006_cast_fp16, var_9350_cast_fp16))[name = tensor("op_9444_cast_fp16")]; tensor var_9446_equation_0 = const()[name = tensor("op_9446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9446_cast_fp16 = einsum(equation = var_9446_equation_0, values = (var_9006_cast_fp16, var_9351_cast_fp16))[name = tensor("op_9446_cast_fp16")]; tensor var_9448_equation_0 = const()[name = tensor("op_9448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9448_cast_fp16 = einsum(equation = var_9448_equation_0, values = (var_9006_cast_fp16, var_9352_cast_fp16))[name = tensor("op_9448_cast_fp16")]; tensor var_9450_equation_0 = const()[name = tensor("op_9450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9450_cast_fp16 = einsum(equation = var_9450_equation_0, values = (var_9010_cast_fp16, var_9353_cast_fp16))[name = tensor("op_9450_cast_fp16")]; tensor var_9452_equation_0 = const()[name = tensor("op_9452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9452_cast_fp16 = einsum(equation = var_9452_equation_0, values = (var_9010_cast_fp16, var_9354_cast_fp16))[name = tensor("op_9452_cast_fp16")]; tensor var_9454_equation_0 = const()[name = tensor("op_9454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9454_cast_fp16 = einsum(equation = var_9454_equation_0, values = (var_9010_cast_fp16, var_9355_cast_fp16))[name = tensor("op_9454_cast_fp16")]; tensor var_9456_equation_0 = const()[name = tensor("op_9456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9456_cast_fp16 = einsum(equation = var_9456_equation_0, values = (var_9010_cast_fp16, var_9356_cast_fp16))[name = tensor("op_9456_cast_fp16")]; tensor var_9458_equation_0 = const()[name = tensor("op_9458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9458_cast_fp16 = einsum(equation = var_9458_equation_0, values = (var_9010_cast_fp16, var_9357_cast_fp16))[name = tensor("op_9458_cast_fp16")]; tensor var_9460_equation_0 = const()[name = tensor("op_9460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9460_cast_fp16 = einsum(equation = var_9460_equation_0, values = (var_9010_cast_fp16, var_9358_cast_fp16))[name = tensor("op_9460_cast_fp16")]; tensor var_9462_equation_0 = const()[name = tensor("op_9462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9462_cast_fp16 = einsum(equation = var_9462_equation_0, values = (var_9014_cast_fp16, var_9359_cast_fp16))[name = tensor("op_9462_cast_fp16")]; tensor var_9464_equation_0 = const()[name = tensor("op_9464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9464_cast_fp16 = einsum(equation = var_9464_equation_0, values = (var_9014_cast_fp16, var_9360_cast_fp16))[name = tensor("op_9464_cast_fp16")]; tensor var_9466_equation_0 = const()[name = tensor("op_9466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9466_cast_fp16 = einsum(equation = var_9466_equation_0, values = (var_9014_cast_fp16, var_9361_cast_fp16))[name = tensor("op_9466_cast_fp16")]; tensor var_9468_equation_0 = const()[name = tensor("op_9468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9468_cast_fp16 = einsum(equation = var_9468_equation_0, values = (var_9014_cast_fp16, var_9362_cast_fp16))[name = tensor("op_9468_cast_fp16")]; tensor var_9470_equation_0 = const()[name = tensor("op_9470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9470_cast_fp16 = einsum(equation = var_9470_equation_0, values = (var_9014_cast_fp16, var_9363_cast_fp16))[name = tensor("op_9470_cast_fp16")]; tensor var_9472_equation_0 = const()[name = tensor("op_9472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9472_cast_fp16 = einsum(equation = var_9472_equation_0, values = (var_9014_cast_fp16, var_9364_cast_fp16))[name = tensor("op_9472_cast_fp16")]; tensor var_9474_equation_0 = const()[name = tensor("op_9474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9474_cast_fp16 = einsum(equation = var_9474_equation_0, values = (var_9018_cast_fp16, var_9365_cast_fp16))[name = tensor("op_9474_cast_fp16")]; tensor var_9476_equation_0 = const()[name = tensor("op_9476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9476_cast_fp16 = einsum(equation = var_9476_equation_0, values = (var_9018_cast_fp16, var_9366_cast_fp16))[name = tensor("op_9476_cast_fp16")]; tensor var_9478_equation_0 = const()[name = tensor("op_9478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9478_cast_fp16 = einsum(equation = var_9478_equation_0, values = (var_9018_cast_fp16, var_9367_cast_fp16))[name = tensor("op_9478_cast_fp16")]; tensor var_9480_equation_0 = const()[name = tensor("op_9480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9480_cast_fp16 = einsum(equation = var_9480_equation_0, values = (var_9018_cast_fp16, var_9368_cast_fp16))[name = tensor("op_9480_cast_fp16")]; tensor var_9482_equation_0 = const()[name = tensor("op_9482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9482_cast_fp16 = einsum(equation = var_9482_equation_0, values = (var_9018_cast_fp16, var_9369_cast_fp16))[name = tensor("op_9482_cast_fp16")]; tensor var_9484_equation_0 = const()[name = tensor("op_9484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9484_cast_fp16 = einsum(equation = var_9484_equation_0, values = (var_9018_cast_fp16, var_9370_cast_fp16))[name = tensor("op_9484_cast_fp16")]; tensor var_9486_equation_0 = const()[name = tensor("op_9486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9486_cast_fp16 = einsum(equation = var_9486_equation_0, values = (var_9022_cast_fp16, var_9371_cast_fp16))[name = tensor("op_9486_cast_fp16")]; tensor var_9488_equation_0 = const()[name = tensor("op_9488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9488_cast_fp16 = einsum(equation = var_9488_equation_0, values = (var_9022_cast_fp16, var_9372_cast_fp16))[name = tensor("op_9488_cast_fp16")]; tensor var_9490_equation_0 = const()[name = tensor("op_9490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9490_cast_fp16 = einsum(equation = var_9490_equation_0, values = (var_9022_cast_fp16, var_9373_cast_fp16))[name = tensor("op_9490_cast_fp16")]; tensor var_9492_equation_0 = const()[name = tensor("op_9492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9492_cast_fp16 = einsum(equation = var_9492_equation_0, values = (var_9022_cast_fp16, var_9374_cast_fp16))[name = tensor("op_9492_cast_fp16")]; tensor var_9494_equation_0 = const()[name = tensor("op_9494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9494_cast_fp16 = einsum(equation = var_9494_equation_0, values = (var_9022_cast_fp16, var_9375_cast_fp16))[name = tensor("op_9494_cast_fp16")]; tensor var_9496_equation_0 = const()[name = tensor("op_9496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9496_cast_fp16 = einsum(equation = var_9496_equation_0, values = (var_9022_cast_fp16, var_9376_cast_fp16))[name = tensor("op_9496_cast_fp16")]; tensor var_9498_equation_0 = const()[name = tensor("op_9498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9498_cast_fp16 = einsum(equation = var_9498_equation_0, values = (var_9026_cast_fp16, var_9377_cast_fp16))[name = tensor("op_9498_cast_fp16")]; tensor var_9500_equation_0 = const()[name = tensor("op_9500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9500_cast_fp16 = einsum(equation = var_9500_equation_0, values = (var_9026_cast_fp16, var_9378_cast_fp16))[name = tensor("op_9500_cast_fp16")]; tensor var_9502_equation_0 = const()[name = tensor("op_9502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9502_cast_fp16 = einsum(equation = var_9502_equation_0, values = (var_9026_cast_fp16, var_9379_cast_fp16))[name = tensor("op_9502_cast_fp16")]; tensor var_9504_equation_0 = const()[name = tensor("op_9504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9504_cast_fp16 = einsum(equation = var_9504_equation_0, values = (var_9026_cast_fp16, var_9380_cast_fp16))[name = tensor("op_9504_cast_fp16")]; tensor var_9506_equation_0 = const()[name = tensor("op_9506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9506_cast_fp16 = einsum(equation = var_9506_equation_0, values = (var_9026_cast_fp16, var_9381_cast_fp16))[name = tensor("op_9506_cast_fp16")]; tensor var_9508_equation_0 = const()[name = tensor("op_9508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9508_cast_fp16 = einsum(equation = var_9508_equation_0, values = (var_9026_cast_fp16, var_9382_cast_fp16))[name = tensor("op_9508_cast_fp16")]; tensor var_9510_equation_0 = const()[name = tensor("op_9510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9510_cast_fp16 = einsum(equation = var_9510_equation_0, values = (var_9030_cast_fp16, var_9383_cast_fp16))[name = tensor("op_9510_cast_fp16")]; tensor var_9512_equation_0 = const()[name = tensor("op_9512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9512_cast_fp16 = einsum(equation = var_9512_equation_0, values = (var_9030_cast_fp16, var_9384_cast_fp16))[name = tensor("op_9512_cast_fp16")]; tensor var_9514_equation_0 = const()[name = tensor("op_9514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9514_cast_fp16 = einsum(equation = var_9514_equation_0, values = (var_9030_cast_fp16, var_9385_cast_fp16))[name = tensor("op_9514_cast_fp16")]; tensor var_9516_equation_0 = const()[name = tensor("op_9516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9516_cast_fp16 = einsum(equation = var_9516_equation_0, values = (var_9030_cast_fp16, var_9386_cast_fp16))[name = tensor("op_9516_cast_fp16")]; tensor var_9518_equation_0 = const()[name = tensor("op_9518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9518_cast_fp16 = einsum(equation = var_9518_equation_0, values = (var_9030_cast_fp16, var_9387_cast_fp16))[name = tensor("op_9518_cast_fp16")]; tensor var_9520_equation_0 = const()[name = tensor("op_9520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9520_cast_fp16 = einsum(equation = var_9520_equation_0, values = (var_9030_cast_fp16, var_9388_cast_fp16))[name = tensor("op_9520_cast_fp16")]; tensor var_9522_equation_0 = const()[name = tensor("op_9522_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9522_cast_fp16 = einsum(equation = var_9522_equation_0, values = (var_9034_cast_fp16, var_9389_cast_fp16))[name = tensor("op_9522_cast_fp16")]; tensor var_9524_equation_0 = const()[name = tensor("op_9524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9524_cast_fp16 = einsum(equation = var_9524_equation_0, values = (var_9034_cast_fp16, var_9390_cast_fp16))[name = tensor("op_9524_cast_fp16")]; tensor var_9526_equation_0 = const()[name = tensor("op_9526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9526_cast_fp16 = einsum(equation = var_9526_equation_0, values = (var_9034_cast_fp16, var_9391_cast_fp16))[name = tensor("op_9526_cast_fp16")]; tensor var_9528_equation_0 = const()[name = tensor("op_9528_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9528_cast_fp16 = einsum(equation = var_9528_equation_0, values = (var_9034_cast_fp16, var_9392_cast_fp16))[name = tensor("op_9528_cast_fp16")]; tensor var_9530_equation_0 = const()[name = tensor("op_9530_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9530_cast_fp16 = einsum(equation = var_9530_equation_0, values = (var_9034_cast_fp16, var_9393_cast_fp16))[name = tensor("op_9530_cast_fp16")]; tensor var_9532_equation_0 = const()[name = tensor("op_9532_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9532_cast_fp16 = einsum(equation = var_9532_equation_0, values = (var_9034_cast_fp16, var_9394_cast_fp16))[name = tensor("op_9532_cast_fp16")]; tensor var_9534_equation_0 = const()[name = tensor("op_9534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9534_cast_fp16 = einsum(equation = var_9534_equation_0, values = (var_9038_cast_fp16, var_9395_cast_fp16))[name = tensor("op_9534_cast_fp16")]; tensor var_9536_equation_0 = const()[name = tensor("op_9536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9536_cast_fp16 = einsum(equation = var_9536_equation_0, values = (var_9038_cast_fp16, var_9396_cast_fp16))[name = tensor("op_9536_cast_fp16")]; tensor var_9538_equation_0 = const()[name = tensor("op_9538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9538_cast_fp16 = einsum(equation = var_9538_equation_0, values = (var_9038_cast_fp16, var_9397_cast_fp16))[name = tensor("op_9538_cast_fp16")]; tensor var_9540_equation_0 = const()[name = tensor("op_9540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9540_cast_fp16 = einsum(equation = var_9540_equation_0, values = (var_9038_cast_fp16, var_9398_cast_fp16))[name = tensor("op_9540_cast_fp16")]; tensor var_9542_equation_0 = const()[name = tensor("op_9542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9542_cast_fp16 = einsum(equation = var_9542_equation_0, values = (var_9038_cast_fp16, var_9399_cast_fp16))[name = tensor("op_9542_cast_fp16")]; tensor var_9544_equation_0 = const()[name = tensor("op_9544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9544_cast_fp16 = einsum(equation = var_9544_equation_0, values = (var_9038_cast_fp16, var_9400_cast_fp16))[name = tensor("op_9544_cast_fp16")]; tensor var_9546_interleave_0 = const()[name = tensor("op_9546_interleave_0"), val = tensor(false)]; tensor var_9546_cast_fp16 = concat(axis = var_8757, interleave = var_9546_interleave_0, values = (var_9402_cast_fp16, var_9404_cast_fp16, var_9406_cast_fp16, var_9408_cast_fp16, var_9410_cast_fp16, var_9412_cast_fp16))[name = tensor("op_9546_cast_fp16")]; tensor var_9548_interleave_0 = const()[name = tensor("op_9548_interleave_0"), val = tensor(false)]; tensor var_9548_cast_fp16 = concat(axis = var_8757, interleave = var_9548_interleave_0, values = (var_9414_cast_fp16, var_9416_cast_fp16, var_9418_cast_fp16, var_9420_cast_fp16, var_9422_cast_fp16, var_9424_cast_fp16))[name = tensor("op_9548_cast_fp16")]; tensor var_9550_interleave_0 = const()[name = tensor("op_9550_interleave_0"), val = tensor(false)]; tensor var_9550_cast_fp16 = concat(axis = var_8757, interleave = var_9550_interleave_0, values = (var_9426_cast_fp16, var_9428_cast_fp16, var_9430_cast_fp16, var_9432_cast_fp16, var_9434_cast_fp16, var_9436_cast_fp16))[name = tensor("op_9550_cast_fp16")]; tensor var_9552_interleave_0 = const()[name = tensor("op_9552_interleave_0"), val = tensor(false)]; tensor var_9552_cast_fp16 = concat(axis = var_8757, interleave = var_9552_interleave_0, values = (var_9438_cast_fp16, var_9440_cast_fp16, var_9442_cast_fp16, var_9444_cast_fp16, var_9446_cast_fp16, var_9448_cast_fp16))[name = tensor("op_9552_cast_fp16")]; tensor var_9554_interleave_0 = const()[name = tensor("op_9554_interleave_0"), val = tensor(false)]; tensor var_9554_cast_fp16 = concat(axis = var_8757, interleave = var_9554_interleave_0, values = (var_9450_cast_fp16, var_9452_cast_fp16, var_9454_cast_fp16, var_9456_cast_fp16, var_9458_cast_fp16, var_9460_cast_fp16))[name = tensor("op_9554_cast_fp16")]; tensor var_9556_interleave_0 = const()[name = tensor("op_9556_interleave_0"), val = tensor(false)]; tensor var_9556_cast_fp16 = concat(axis = var_8757, interleave = var_9556_interleave_0, values = (var_9462_cast_fp16, var_9464_cast_fp16, var_9466_cast_fp16, var_9468_cast_fp16, var_9470_cast_fp16, var_9472_cast_fp16))[name = tensor("op_9556_cast_fp16")]; tensor var_9558_interleave_0 = const()[name = tensor("op_9558_interleave_0"), val = tensor(false)]; tensor var_9558_cast_fp16 = concat(axis = var_8757, interleave = var_9558_interleave_0, values = (var_9474_cast_fp16, var_9476_cast_fp16, var_9478_cast_fp16, var_9480_cast_fp16, var_9482_cast_fp16, var_9484_cast_fp16))[name = tensor("op_9558_cast_fp16")]; tensor var_9560_interleave_0 = const()[name = tensor("op_9560_interleave_0"), val = tensor(false)]; tensor var_9560_cast_fp16 = concat(axis = var_8757, interleave = var_9560_interleave_0, values = (var_9486_cast_fp16, var_9488_cast_fp16, var_9490_cast_fp16, var_9492_cast_fp16, var_9494_cast_fp16, var_9496_cast_fp16))[name = tensor("op_9560_cast_fp16")]; tensor var_9562_interleave_0 = const()[name = tensor("op_9562_interleave_0"), val = tensor(false)]; tensor var_9562_cast_fp16 = concat(axis = var_8757, interleave = var_9562_interleave_0, values = (var_9498_cast_fp16, var_9500_cast_fp16, var_9502_cast_fp16, var_9504_cast_fp16, var_9506_cast_fp16, var_9508_cast_fp16))[name = tensor("op_9562_cast_fp16")]; tensor var_9564_interleave_0 = const()[name = tensor("op_9564_interleave_0"), val = tensor(false)]; tensor var_9564_cast_fp16 = concat(axis = var_8757, interleave = var_9564_interleave_0, values = (var_9510_cast_fp16, var_9512_cast_fp16, var_9514_cast_fp16, var_9516_cast_fp16, var_9518_cast_fp16, var_9520_cast_fp16))[name = tensor("op_9564_cast_fp16")]; tensor var_9566_interleave_0 = const()[name = tensor("op_9566_interleave_0"), val = tensor(false)]; tensor var_9566_cast_fp16 = concat(axis = var_8757, interleave = var_9566_interleave_0, values = (var_9522_cast_fp16, var_9524_cast_fp16, var_9526_cast_fp16, var_9528_cast_fp16, var_9530_cast_fp16, var_9532_cast_fp16))[name = tensor("op_9566_cast_fp16")]; tensor var_9568_interleave_0 = const()[name = tensor("op_9568_interleave_0"), val = tensor(false)]; tensor var_9568_cast_fp16 = concat(axis = var_8757, interleave = var_9568_interleave_0, values = (var_9534_cast_fp16, var_9536_cast_fp16, var_9538_cast_fp16, var_9540_cast_fp16, var_9542_cast_fp16, var_9544_cast_fp16))[name = tensor("op_9568_cast_fp16")]; tensor input_81_interleave_0 = const()[name = tensor("input_81_interleave_0"), val = tensor(false)]; tensor input_81_cast_fp16 = concat(axis = var_8773, interleave = input_81_interleave_0, values = (var_9546_cast_fp16, var_9548_cast_fp16, var_9550_cast_fp16, var_9552_cast_fp16, var_9554_cast_fp16, var_9556_cast_fp16, var_9558_cast_fp16, var_9560_cast_fp16, var_9562_cast_fp16, var_9564_cast_fp16, var_9566_cast_fp16, var_9568_cast_fp16))[name = tensor("input_81_cast_fp16")]; tensor obj_43_pad_type_0 = const()[name = tensor("obj_43_pad_type_0"), val = tensor("valid")]; tensor obj_43_strides_0 = const()[name = tensor("obj_43_strides_0"), val = tensor([1, 1])]; tensor obj_43_pad_0 = const()[name = tensor("obj_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_43_dilations_0 = const()[name = tensor("obj_43_dilations_0"), val = tensor([1, 1])]; tensor obj_43_groups_0 = const()[name = tensor("obj_43_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151515456)))]; tensor layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152695168)))]; tensor obj_43_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = obj_43_dilations_0, groups = obj_43_groups_0, pad = obj_43_pad_0, pad_type = obj_43_pad_type_0, strides = obj_43_strides_0, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("obj_43_cast_fp16")]; tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; tensor var_9587_to_fp16 = const()[name = tensor("op_9587_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_9587_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152696768)))]; tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152698368)))]; tensor input_83_epsilon_0_to_fp16 = const()[name = tensor("input_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor input_85_pad_type_0 = const()[name = tensor("input_85_pad_type_0"), val = tensor("valid")]; tensor input_85_strides_0 = const()[name = tensor("input_85_strides_0"), val = tensor([1, 1])]; tensor input_85_pad_0 = const()[name = tensor("input_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_85_dilations_0 = const()[name = tensor("input_85_dilations_0"), val = tensor([1, 1])]; tensor input_85_groups_0 = const()[name = tensor("input_85_groups_0"), val = tensor(1)]; tensor layers_10_fc1_weight_to_fp16 = const()[name = tensor("layers_10_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152699968)))]; tensor layers_10_fc1_bias_to_fp16 = const()[name = tensor("layers_10_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157418624)))]; tensor input_85_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = input_85_dilations_0, groups = input_85_groups_0, pad = input_85_pad_0, pad_type = input_85_pad_type_0, strides = input_85_strides_0, weight = layers_10_fc1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("input_85_cast_fp16")]; tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor hidden_states_25_pad_type_0 = const()[name = tensor("hidden_states_25_pad_type_0"), val = tensor("valid")]; tensor hidden_states_25_strides_0 = const()[name = tensor("hidden_states_25_strides_0"), val = tensor([1, 1])]; tensor hidden_states_25_pad_0 = const()[name = tensor("hidden_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_25_dilations_0 = const()[name = tensor("hidden_states_25_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_25_groups_0 = const()[name = tensor("hidden_states_25_groups_0"), val = tensor(1)]; tensor layers_10_fc2_weight_to_fp16 = const()[name = tensor("layers_10_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157424832)))]; tensor layers_10_fc2_bias_to_fp16 = const()[name = tensor("layers_10_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162143488)))]; tensor hidden_states_25_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = hidden_states_25_dilations_0, groups = hidden_states_25_groups_0, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = hidden_states_25_strides_0, weight = layers_10_fc2_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; tensor var_9619 = const()[name = tensor("op_9619"), val = tensor(3)]; tensor var_9635 = const()[name = tensor("op_9635"), val = tensor(1)]; tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; tensor var_9652_to_fp16 = const()[name = tensor("op_9652_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_9652_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; tensor obj_45_gamma_0_to_fp16 = const()[name = tensor("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162145088)))]; tensor obj_45_beta_0_to_fp16 = const()[name = tensor("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162146688)))]; tensor obj_45_epsilon_0_to_fp16 = const()[name = tensor("obj_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_45_cast_fp16")]; tensor query_pad_type_0 = const()[name = tensor("query_pad_type_0"), val = tensor("valid")]; tensor query_strides_0 = const()[name = tensor("query_strides_0"), val = tensor([1, 1])]; tensor query_pad_0 = const()[name = tensor("query_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_dilations_0 = const()[name = tensor("query_dilations_0"), val = tensor([1, 1])]; tensor query_groups_0 = const()[name = tensor("query_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162148288)))]; tensor layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163328000)))]; tensor query_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor("query_cast_fp16")]; tensor key_pad_type_0 = const()[name = tensor("key_pad_type_0"), val = tensor("valid")]; tensor key_strides_0 = const()[name = tensor("key_strides_0"), val = tensor([1, 1])]; tensor key_pad_0 = const()[name = tensor("key_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_dilations_0 = const()[name = tensor("key_dilations_0"), val = tensor([1, 1])]; tensor key_groups_0 = const()[name = tensor("key_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163329600)))]; tensor key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor("key_cast_fp16")]; tensor value_pad_type_0 = const()[name = tensor("value_pad_type_0"), val = tensor("valid")]; tensor value_strides_0 = const()[name = tensor("value_strides_0"), val = tensor([1, 1])]; tensor value_pad_0 = const()[name = tensor("value_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_dilations_0 = const()[name = tensor("value_dilations_0"), val = tensor([1, 1])]; tensor value_groups_0 = const()[name = tensor("value_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164509312)))]; tensor layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165689024)))]; tensor value_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor("value_cast_fp16")]; tensor var_9687_begin_0 = const()[name = tensor("op_9687_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9687_end_0 = const()[name = tensor("op_9687_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_9687_end_mask_0 = const()[name = tensor("op_9687_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9687_cast_fp16 = slice_by_index(begin = var_9687_begin_0, end = var_9687_end_0, end_mask = var_9687_end_mask_0, x = query_cast_fp16)[name = tensor("op_9687_cast_fp16")]; tensor var_9691_begin_0 = const()[name = tensor("op_9691_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_9691_end_0 = const()[name = tensor("op_9691_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_9691_end_mask_0 = const()[name = tensor("op_9691_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9691_cast_fp16 = slice_by_index(begin = var_9691_begin_0, end = var_9691_end_0, end_mask = var_9691_end_mask_0, x = query_cast_fp16)[name = tensor("op_9691_cast_fp16")]; tensor var_9695_begin_0 = const()[name = tensor("op_9695_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_9695_end_0 = const()[name = tensor("op_9695_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_9695_end_mask_0 = const()[name = tensor("op_9695_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9695_cast_fp16 = slice_by_index(begin = var_9695_begin_0, end = var_9695_end_0, end_mask = var_9695_end_mask_0, x = query_cast_fp16)[name = tensor("op_9695_cast_fp16")]; tensor var_9699_begin_0 = const()[name = tensor("op_9699_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_9699_end_0 = const()[name = tensor("op_9699_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_9699_end_mask_0 = const()[name = tensor("op_9699_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9699_cast_fp16 = slice_by_index(begin = var_9699_begin_0, end = var_9699_end_0, end_mask = var_9699_end_mask_0, x = query_cast_fp16)[name = tensor("op_9699_cast_fp16")]; tensor var_9703_begin_0 = const()[name = tensor("op_9703_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_9703_end_0 = const()[name = tensor("op_9703_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_9703_end_mask_0 = const()[name = tensor("op_9703_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9703_cast_fp16 = slice_by_index(begin = var_9703_begin_0, end = var_9703_end_0, end_mask = var_9703_end_mask_0, x = query_cast_fp16)[name = tensor("op_9703_cast_fp16")]; tensor var_9707_begin_0 = const()[name = tensor("op_9707_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9707_end_0 = const()[name = tensor("op_9707_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_9707_end_mask_0 = const()[name = tensor("op_9707_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9707_cast_fp16 = slice_by_index(begin = var_9707_begin_0, end = var_9707_end_0, end_mask = var_9707_end_mask_0, x = query_cast_fp16)[name = tensor("op_9707_cast_fp16")]; tensor var_9711_begin_0 = const()[name = tensor("op_9711_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_9711_end_0 = const()[name = tensor("op_9711_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_9711_end_mask_0 = const()[name = tensor("op_9711_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9711_cast_fp16 = slice_by_index(begin = var_9711_begin_0, end = var_9711_end_0, end_mask = var_9711_end_mask_0, x = query_cast_fp16)[name = tensor("op_9711_cast_fp16")]; tensor var_9715_begin_0 = const()[name = tensor("op_9715_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_9715_end_0 = const()[name = tensor("op_9715_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_9715_end_mask_0 = const()[name = tensor("op_9715_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9715_cast_fp16 = slice_by_index(begin = var_9715_begin_0, end = var_9715_end_0, end_mask = var_9715_end_mask_0, x = query_cast_fp16)[name = tensor("op_9715_cast_fp16")]; tensor var_9719_begin_0 = const()[name = tensor("op_9719_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_9719_end_0 = const()[name = tensor("op_9719_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_9719_end_mask_0 = const()[name = tensor("op_9719_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9719_cast_fp16 = slice_by_index(begin = var_9719_begin_0, end = var_9719_end_0, end_mask = var_9719_end_mask_0, x = query_cast_fp16)[name = tensor("op_9719_cast_fp16")]; tensor var_9723_begin_0 = const()[name = tensor("op_9723_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_9723_end_0 = const()[name = tensor("op_9723_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_9723_end_mask_0 = const()[name = tensor("op_9723_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9723_cast_fp16 = slice_by_index(begin = var_9723_begin_0, end = var_9723_end_0, end_mask = var_9723_end_mask_0, x = query_cast_fp16)[name = tensor("op_9723_cast_fp16")]; tensor var_9727_begin_0 = const()[name = tensor("op_9727_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_9727_end_0 = const()[name = tensor("op_9727_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_9727_end_mask_0 = const()[name = tensor("op_9727_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9727_cast_fp16 = slice_by_index(begin = var_9727_begin_0, end = var_9727_end_0, end_mask = var_9727_end_mask_0, x = query_cast_fp16)[name = tensor("op_9727_cast_fp16")]; tensor var_9731_begin_0 = const()[name = tensor("op_9731_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_9731_end_0 = const()[name = tensor("op_9731_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_9731_end_mask_0 = const()[name = tensor("op_9731_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9731_cast_fp16 = slice_by_index(begin = var_9731_begin_0, end = var_9731_end_0, end_mask = var_9731_end_mask_0, x = query_cast_fp16)[name = tensor("op_9731_cast_fp16")]; tensor var_9734_begin_0 = const()[name = tensor("op_9734_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9734_end_0 = const()[name = tensor("op_9734_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9734_end_mask_0 = const()[name = tensor("op_9734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9734_cast_fp16 = slice_by_index(begin = var_9734_begin_0, end = var_9734_end_0, end_mask = var_9734_end_mask_0, x = var_9687_cast_fp16)[name = tensor("op_9734_cast_fp16")]; tensor var_9735_begin_0 = const()[name = tensor("op_9735_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9735_end_0 = const()[name = tensor("op_9735_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9735_end_mask_0 = const()[name = tensor("op_9735_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9735_cast_fp16 = slice_by_index(begin = var_9735_begin_0, end = var_9735_end_0, end_mask = var_9735_end_mask_0, x = var_9687_cast_fp16)[name = tensor("op_9735_cast_fp16")]; tensor var_9736_begin_0 = const()[name = tensor("op_9736_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9736_end_0 = const()[name = tensor("op_9736_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9736_end_mask_0 = const()[name = tensor("op_9736_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9736_cast_fp16 = slice_by_index(begin = var_9736_begin_0, end = var_9736_end_0, end_mask = var_9736_end_mask_0, x = var_9687_cast_fp16)[name = tensor("op_9736_cast_fp16")]; tensor var_9737_begin_0 = const()[name = tensor("op_9737_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9737_end_0 = const()[name = tensor("op_9737_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9737_end_mask_0 = const()[name = tensor("op_9737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9737_cast_fp16 = slice_by_index(begin = var_9737_begin_0, end = var_9737_end_0, end_mask = var_9737_end_mask_0, x = var_9687_cast_fp16)[name = tensor("op_9737_cast_fp16")]; tensor var_9738_begin_0 = const()[name = tensor("op_9738_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9738_end_0 = const()[name = tensor("op_9738_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9738_end_mask_0 = const()[name = tensor("op_9738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9738_cast_fp16 = slice_by_index(begin = var_9738_begin_0, end = var_9738_end_0, end_mask = var_9738_end_mask_0, x = var_9687_cast_fp16)[name = tensor("op_9738_cast_fp16")]; tensor var_9739_begin_0 = const()[name = tensor("op_9739_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9739_end_0 = const()[name = tensor("op_9739_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9739_end_mask_0 = const()[name = tensor("op_9739_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9739_cast_fp16 = slice_by_index(begin = var_9739_begin_0, end = var_9739_end_0, end_mask = var_9739_end_mask_0, x = var_9687_cast_fp16)[name = tensor("op_9739_cast_fp16")]; tensor var_9740_begin_0 = const()[name = tensor("op_9740_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9740_end_0 = const()[name = tensor("op_9740_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9740_end_mask_0 = const()[name = tensor("op_9740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9740_cast_fp16 = slice_by_index(begin = var_9740_begin_0, end = var_9740_end_0, end_mask = var_9740_end_mask_0, x = var_9691_cast_fp16)[name = tensor("op_9740_cast_fp16")]; tensor var_9741_begin_0 = const()[name = tensor("op_9741_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9741_end_0 = const()[name = tensor("op_9741_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9741_end_mask_0 = const()[name = tensor("op_9741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9741_cast_fp16 = slice_by_index(begin = var_9741_begin_0, end = var_9741_end_0, end_mask = var_9741_end_mask_0, x = var_9691_cast_fp16)[name = tensor("op_9741_cast_fp16")]; tensor var_9742_begin_0 = const()[name = tensor("op_9742_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9742_end_0 = const()[name = tensor("op_9742_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9742_end_mask_0 = const()[name = tensor("op_9742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9742_cast_fp16 = slice_by_index(begin = var_9742_begin_0, end = var_9742_end_0, end_mask = var_9742_end_mask_0, x = var_9691_cast_fp16)[name = tensor("op_9742_cast_fp16")]; tensor var_9743_begin_0 = const()[name = tensor("op_9743_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9743_end_0 = const()[name = tensor("op_9743_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9743_end_mask_0 = const()[name = tensor("op_9743_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9743_cast_fp16 = slice_by_index(begin = var_9743_begin_0, end = var_9743_end_0, end_mask = var_9743_end_mask_0, x = var_9691_cast_fp16)[name = tensor("op_9743_cast_fp16")]; tensor var_9744_begin_0 = const()[name = tensor("op_9744_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9744_end_0 = const()[name = tensor("op_9744_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9744_end_mask_0 = const()[name = tensor("op_9744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9744_cast_fp16 = slice_by_index(begin = var_9744_begin_0, end = var_9744_end_0, end_mask = var_9744_end_mask_0, x = var_9691_cast_fp16)[name = tensor("op_9744_cast_fp16")]; tensor var_9745_begin_0 = const()[name = tensor("op_9745_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9745_end_0 = const()[name = tensor("op_9745_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9745_end_mask_0 = const()[name = tensor("op_9745_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9745_cast_fp16 = slice_by_index(begin = var_9745_begin_0, end = var_9745_end_0, end_mask = var_9745_end_mask_0, x = var_9691_cast_fp16)[name = tensor("op_9745_cast_fp16")]; tensor var_9746_begin_0 = const()[name = tensor("op_9746_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9746_end_0 = const()[name = tensor("op_9746_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9746_end_mask_0 = const()[name = tensor("op_9746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9746_cast_fp16 = slice_by_index(begin = var_9746_begin_0, end = var_9746_end_0, end_mask = var_9746_end_mask_0, x = var_9695_cast_fp16)[name = tensor("op_9746_cast_fp16")]; tensor var_9747_begin_0 = const()[name = tensor("op_9747_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9747_end_0 = const()[name = tensor("op_9747_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9747_end_mask_0 = const()[name = tensor("op_9747_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9747_cast_fp16 = slice_by_index(begin = var_9747_begin_0, end = var_9747_end_0, end_mask = var_9747_end_mask_0, x = var_9695_cast_fp16)[name = tensor("op_9747_cast_fp16")]; tensor var_9748_begin_0 = const()[name = tensor("op_9748_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9748_end_0 = const()[name = tensor("op_9748_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9748_end_mask_0 = const()[name = tensor("op_9748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9748_cast_fp16 = slice_by_index(begin = var_9748_begin_0, end = var_9748_end_0, end_mask = var_9748_end_mask_0, x = var_9695_cast_fp16)[name = tensor("op_9748_cast_fp16")]; tensor var_9749_begin_0 = const()[name = tensor("op_9749_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9749_end_0 = const()[name = tensor("op_9749_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9749_end_mask_0 = const()[name = tensor("op_9749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9749_cast_fp16 = slice_by_index(begin = var_9749_begin_0, end = var_9749_end_0, end_mask = var_9749_end_mask_0, x = var_9695_cast_fp16)[name = tensor("op_9749_cast_fp16")]; tensor var_9750_begin_0 = const()[name = tensor("op_9750_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9750_end_0 = const()[name = tensor("op_9750_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9750_end_mask_0 = const()[name = tensor("op_9750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9750_cast_fp16 = slice_by_index(begin = var_9750_begin_0, end = var_9750_end_0, end_mask = var_9750_end_mask_0, x = var_9695_cast_fp16)[name = tensor("op_9750_cast_fp16")]; tensor var_9751_begin_0 = const()[name = tensor("op_9751_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9751_end_0 = const()[name = tensor("op_9751_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9751_end_mask_0 = const()[name = tensor("op_9751_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9751_cast_fp16 = slice_by_index(begin = var_9751_begin_0, end = var_9751_end_0, end_mask = var_9751_end_mask_0, x = var_9695_cast_fp16)[name = tensor("op_9751_cast_fp16")]; tensor var_9752_begin_0 = const()[name = tensor("op_9752_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9752_end_0 = const()[name = tensor("op_9752_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9752_end_mask_0 = const()[name = tensor("op_9752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9752_cast_fp16 = slice_by_index(begin = var_9752_begin_0, end = var_9752_end_0, end_mask = var_9752_end_mask_0, x = var_9699_cast_fp16)[name = tensor("op_9752_cast_fp16")]; tensor var_9753_begin_0 = const()[name = tensor("op_9753_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9753_end_0 = const()[name = tensor("op_9753_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9753_end_mask_0 = const()[name = tensor("op_9753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9753_cast_fp16 = slice_by_index(begin = var_9753_begin_0, end = var_9753_end_0, end_mask = var_9753_end_mask_0, x = var_9699_cast_fp16)[name = tensor("op_9753_cast_fp16")]; tensor var_9754_begin_0 = const()[name = tensor("op_9754_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9754_end_0 = const()[name = tensor("op_9754_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9754_end_mask_0 = const()[name = tensor("op_9754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9754_cast_fp16 = slice_by_index(begin = var_9754_begin_0, end = var_9754_end_0, end_mask = var_9754_end_mask_0, x = var_9699_cast_fp16)[name = tensor("op_9754_cast_fp16")]; tensor var_9755_begin_0 = const()[name = tensor("op_9755_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9755_end_0 = const()[name = tensor("op_9755_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9755_end_mask_0 = const()[name = tensor("op_9755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9755_cast_fp16 = slice_by_index(begin = var_9755_begin_0, end = var_9755_end_0, end_mask = var_9755_end_mask_0, x = var_9699_cast_fp16)[name = tensor("op_9755_cast_fp16")]; tensor var_9756_begin_0 = const()[name = tensor("op_9756_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9756_end_0 = const()[name = tensor("op_9756_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9756_end_mask_0 = const()[name = tensor("op_9756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9756_cast_fp16 = slice_by_index(begin = var_9756_begin_0, end = var_9756_end_0, end_mask = var_9756_end_mask_0, x = var_9699_cast_fp16)[name = tensor("op_9756_cast_fp16")]; tensor var_9757_begin_0 = const()[name = tensor("op_9757_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9757_end_0 = const()[name = tensor("op_9757_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9757_end_mask_0 = const()[name = tensor("op_9757_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9757_cast_fp16 = slice_by_index(begin = var_9757_begin_0, end = var_9757_end_0, end_mask = var_9757_end_mask_0, x = var_9699_cast_fp16)[name = tensor("op_9757_cast_fp16")]; tensor var_9758_begin_0 = const()[name = tensor("op_9758_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9758_end_0 = const()[name = tensor("op_9758_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9758_end_mask_0 = const()[name = tensor("op_9758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9758_cast_fp16 = slice_by_index(begin = var_9758_begin_0, end = var_9758_end_0, end_mask = var_9758_end_mask_0, x = var_9703_cast_fp16)[name = tensor("op_9758_cast_fp16")]; tensor var_9759_begin_0 = const()[name = tensor("op_9759_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9759_end_0 = const()[name = tensor("op_9759_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9759_end_mask_0 = const()[name = tensor("op_9759_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9759_cast_fp16 = slice_by_index(begin = var_9759_begin_0, end = var_9759_end_0, end_mask = var_9759_end_mask_0, x = var_9703_cast_fp16)[name = tensor("op_9759_cast_fp16")]; tensor var_9760_begin_0 = const()[name = tensor("op_9760_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9760_end_0 = const()[name = tensor("op_9760_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9760_end_mask_0 = const()[name = tensor("op_9760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9760_cast_fp16 = slice_by_index(begin = var_9760_begin_0, end = var_9760_end_0, end_mask = var_9760_end_mask_0, x = var_9703_cast_fp16)[name = tensor("op_9760_cast_fp16")]; tensor var_9761_begin_0 = const()[name = tensor("op_9761_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9761_end_0 = const()[name = tensor("op_9761_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9761_end_mask_0 = const()[name = tensor("op_9761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9761_cast_fp16 = slice_by_index(begin = var_9761_begin_0, end = var_9761_end_0, end_mask = var_9761_end_mask_0, x = var_9703_cast_fp16)[name = tensor("op_9761_cast_fp16")]; tensor var_9762_begin_0 = const()[name = tensor("op_9762_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9762_end_0 = const()[name = tensor("op_9762_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9762_end_mask_0 = const()[name = tensor("op_9762_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9762_cast_fp16 = slice_by_index(begin = var_9762_begin_0, end = var_9762_end_0, end_mask = var_9762_end_mask_0, x = var_9703_cast_fp16)[name = tensor("op_9762_cast_fp16")]; tensor var_9763_begin_0 = const()[name = tensor("op_9763_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9763_end_0 = const()[name = tensor("op_9763_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9763_end_mask_0 = const()[name = tensor("op_9763_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9763_cast_fp16 = slice_by_index(begin = var_9763_begin_0, end = var_9763_end_0, end_mask = var_9763_end_mask_0, x = var_9703_cast_fp16)[name = tensor("op_9763_cast_fp16")]; tensor var_9764_begin_0 = const()[name = tensor("op_9764_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9764_end_0 = const()[name = tensor("op_9764_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9764_end_mask_0 = const()[name = tensor("op_9764_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9764_cast_fp16 = slice_by_index(begin = var_9764_begin_0, end = var_9764_end_0, end_mask = var_9764_end_mask_0, x = var_9707_cast_fp16)[name = tensor("op_9764_cast_fp16")]; tensor var_9765_begin_0 = const()[name = tensor("op_9765_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9765_end_0 = const()[name = tensor("op_9765_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9765_end_mask_0 = const()[name = tensor("op_9765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9765_cast_fp16 = slice_by_index(begin = var_9765_begin_0, end = var_9765_end_0, end_mask = var_9765_end_mask_0, x = var_9707_cast_fp16)[name = tensor("op_9765_cast_fp16")]; tensor var_9766_begin_0 = const()[name = tensor("op_9766_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9766_end_0 = const()[name = tensor("op_9766_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9766_end_mask_0 = const()[name = tensor("op_9766_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9766_cast_fp16 = slice_by_index(begin = var_9766_begin_0, end = var_9766_end_0, end_mask = var_9766_end_mask_0, x = var_9707_cast_fp16)[name = tensor("op_9766_cast_fp16")]; tensor var_9767_begin_0 = const()[name = tensor("op_9767_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9767_end_0 = const()[name = tensor("op_9767_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9767_end_mask_0 = const()[name = tensor("op_9767_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9767_cast_fp16 = slice_by_index(begin = var_9767_begin_0, end = var_9767_end_0, end_mask = var_9767_end_mask_0, x = var_9707_cast_fp16)[name = tensor("op_9767_cast_fp16")]; tensor var_9768_begin_0 = const()[name = tensor("op_9768_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9768_end_0 = const()[name = tensor("op_9768_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9768_end_mask_0 = const()[name = tensor("op_9768_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9768_cast_fp16 = slice_by_index(begin = var_9768_begin_0, end = var_9768_end_0, end_mask = var_9768_end_mask_0, x = var_9707_cast_fp16)[name = tensor("op_9768_cast_fp16")]; tensor var_9769_begin_0 = const()[name = tensor("op_9769_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9769_end_0 = const()[name = tensor("op_9769_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9769_end_mask_0 = const()[name = tensor("op_9769_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9769_cast_fp16 = slice_by_index(begin = var_9769_begin_0, end = var_9769_end_0, end_mask = var_9769_end_mask_0, x = var_9707_cast_fp16)[name = tensor("op_9769_cast_fp16")]; tensor var_9770_begin_0 = const()[name = tensor("op_9770_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9770_end_0 = const()[name = tensor("op_9770_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9770_end_mask_0 = const()[name = tensor("op_9770_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9770_cast_fp16 = slice_by_index(begin = var_9770_begin_0, end = var_9770_end_0, end_mask = var_9770_end_mask_0, x = var_9711_cast_fp16)[name = tensor("op_9770_cast_fp16")]; tensor var_9771_begin_0 = const()[name = tensor("op_9771_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9771_end_0 = const()[name = tensor("op_9771_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9771_end_mask_0 = const()[name = tensor("op_9771_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9771_cast_fp16 = slice_by_index(begin = var_9771_begin_0, end = var_9771_end_0, end_mask = var_9771_end_mask_0, x = var_9711_cast_fp16)[name = tensor("op_9771_cast_fp16")]; tensor var_9772_begin_0 = const()[name = tensor("op_9772_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9772_end_0 = const()[name = tensor("op_9772_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9772_end_mask_0 = const()[name = tensor("op_9772_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9772_cast_fp16 = slice_by_index(begin = var_9772_begin_0, end = var_9772_end_0, end_mask = var_9772_end_mask_0, x = var_9711_cast_fp16)[name = tensor("op_9772_cast_fp16")]; tensor var_9773_begin_0 = const()[name = tensor("op_9773_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9773_end_0 = const()[name = tensor("op_9773_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9773_end_mask_0 = const()[name = tensor("op_9773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9773_cast_fp16 = slice_by_index(begin = var_9773_begin_0, end = var_9773_end_0, end_mask = var_9773_end_mask_0, x = var_9711_cast_fp16)[name = tensor("op_9773_cast_fp16")]; tensor var_9774_begin_0 = const()[name = tensor("op_9774_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9774_end_0 = const()[name = tensor("op_9774_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9774_end_mask_0 = const()[name = tensor("op_9774_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9774_cast_fp16 = slice_by_index(begin = var_9774_begin_0, end = var_9774_end_0, end_mask = var_9774_end_mask_0, x = var_9711_cast_fp16)[name = tensor("op_9774_cast_fp16")]; tensor var_9775_begin_0 = const()[name = tensor("op_9775_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9775_end_0 = const()[name = tensor("op_9775_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9775_end_mask_0 = const()[name = tensor("op_9775_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9775_cast_fp16 = slice_by_index(begin = var_9775_begin_0, end = var_9775_end_0, end_mask = var_9775_end_mask_0, x = var_9711_cast_fp16)[name = tensor("op_9775_cast_fp16")]; tensor var_9776_begin_0 = const()[name = tensor("op_9776_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9776_end_0 = const()[name = tensor("op_9776_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9776_end_mask_0 = const()[name = tensor("op_9776_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9776_cast_fp16 = slice_by_index(begin = var_9776_begin_0, end = var_9776_end_0, end_mask = var_9776_end_mask_0, x = var_9715_cast_fp16)[name = tensor("op_9776_cast_fp16")]; tensor var_9777_begin_0 = const()[name = tensor("op_9777_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9777_end_0 = const()[name = tensor("op_9777_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9777_end_mask_0 = const()[name = tensor("op_9777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9777_cast_fp16 = slice_by_index(begin = var_9777_begin_0, end = var_9777_end_0, end_mask = var_9777_end_mask_0, x = var_9715_cast_fp16)[name = tensor("op_9777_cast_fp16")]; tensor var_9778_begin_0 = const()[name = tensor("op_9778_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9778_end_0 = const()[name = tensor("op_9778_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9778_end_mask_0 = const()[name = tensor("op_9778_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9778_cast_fp16 = slice_by_index(begin = var_9778_begin_0, end = var_9778_end_0, end_mask = var_9778_end_mask_0, x = var_9715_cast_fp16)[name = tensor("op_9778_cast_fp16")]; tensor var_9779_begin_0 = const()[name = tensor("op_9779_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9779_end_0 = const()[name = tensor("op_9779_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9779_end_mask_0 = const()[name = tensor("op_9779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9779_cast_fp16 = slice_by_index(begin = var_9779_begin_0, end = var_9779_end_0, end_mask = var_9779_end_mask_0, x = var_9715_cast_fp16)[name = tensor("op_9779_cast_fp16")]; tensor var_9780_begin_0 = const()[name = tensor("op_9780_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9780_end_0 = const()[name = tensor("op_9780_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9780_end_mask_0 = const()[name = tensor("op_9780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9780_cast_fp16 = slice_by_index(begin = var_9780_begin_0, end = var_9780_end_0, end_mask = var_9780_end_mask_0, x = var_9715_cast_fp16)[name = tensor("op_9780_cast_fp16")]; tensor var_9781_begin_0 = const()[name = tensor("op_9781_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9781_end_0 = const()[name = tensor("op_9781_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9781_end_mask_0 = const()[name = tensor("op_9781_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9781_cast_fp16 = slice_by_index(begin = var_9781_begin_0, end = var_9781_end_0, end_mask = var_9781_end_mask_0, x = var_9715_cast_fp16)[name = tensor("op_9781_cast_fp16")]; tensor var_9782_begin_0 = const()[name = tensor("op_9782_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9782_end_0 = const()[name = tensor("op_9782_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9782_end_mask_0 = const()[name = tensor("op_9782_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9782_cast_fp16 = slice_by_index(begin = var_9782_begin_0, end = var_9782_end_0, end_mask = var_9782_end_mask_0, x = var_9719_cast_fp16)[name = tensor("op_9782_cast_fp16")]; tensor var_9783_begin_0 = const()[name = tensor("op_9783_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9783_end_0 = const()[name = tensor("op_9783_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9783_end_mask_0 = const()[name = tensor("op_9783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9783_cast_fp16 = slice_by_index(begin = var_9783_begin_0, end = var_9783_end_0, end_mask = var_9783_end_mask_0, x = var_9719_cast_fp16)[name = tensor("op_9783_cast_fp16")]; tensor var_9784_begin_0 = const()[name = tensor("op_9784_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9784_end_0 = const()[name = tensor("op_9784_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9784_end_mask_0 = const()[name = tensor("op_9784_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9784_cast_fp16 = slice_by_index(begin = var_9784_begin_0, end = var_9784_end_0, end_mask = var_9784_end_mask_0, x = var_9719_cast_fp16)[name = tensor("op_9784_cast_fp16")]; tensor var_9785_begin_0 = const()[name = tensor("op_9785_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9785_end_0 = const()[name = tensor("op_9785_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9785_end_mask_0 = const()[name = tensor("op_9785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9785_cast_fp16 = slice_by_index(begin = var_9785_begin_0, end = var_9785_end_0, end_mask = var_9785_end_mask_0, x = var_9719_cast_fp16)[name = tensor("op_9785_cast_fp16")]; tensor var_9786_begin_0 = const()[name = tensor("op_9786_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9786_end_0 = const()[name = tensor("op_9786_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9786_end_mask_0 = const()[name = tensor("op_9786_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9786_cast_fp16 = slice_by_index(begin = var_9786_begin_0, end = var_9786_end_0, end_mask = var_9786_end_mask_0, x = var_9719_cast_fp16)[name = tensor("op_9786_cast_fp16")]; tensor var_9787_begin_0 = const()[name = tensor("op_9787_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9787_end_0 = const()[name = tensor("op_9787_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9787_end_mask_0 = const()[name = tensor("op_9787_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9787_cast_fp16 = slice_by_index(begin = var_9787_begin_0, end = var_9787_end_0, end_mask = var_9787_end_mask_0, x = var_9719_cast_fp16)[name = tensor("op_9787_cast_fp16")]; tensor var_9788_begin_0 = const()[name = tensor("op_9788_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9788_end_0 = const()[name = tensor("op_9788_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9788_end_mask_0 = const()[name = tensor("op_9788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9788_cast_fp16 = slice_by_index(begin = var_9788_begin_0, end = var_9788_end_0, end_mask = var_9788_end_mask_0, x = var_9723_cast_fp16)[name = tensor("op_9788_cast_fp16")]; tensor var_9789_begin_0 = const()[name = tensor("op_9789_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9789_end_0 = const()[name = tensor("op_9789_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9789_end_mask_0 = const()[name = tensor("op_9789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9789_cast_fp16 = slice_by_index(begin = var_9789_begin_0, end = var_9789_end_0, end_mask = var_9789_end_mask_0, x = var_9723_cast_fp16)[name = tensor("op_9789_cast_fp16")]; tensor var_9790_begin_0 = const()[name = tensor("op_9790_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9790_end_0 = const()[name = tensor("op_9790_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9790_end_mask_0 = const()[name = tensor("op_9790_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9790_cast_fp16 = slice_by_index(begin = var_9790_begin_0, end = var_9790_end_0, end_mask = var_9790_end_mask_0, x = var_9723_cast_fp16)[name = tensor("op_9790_cast_fp16")]; tensor var_9791_begin_0 = const()[name = tensor("op_9791_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9791_end_0 = const()[name = tensor("op_9791_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9791_end_mask_0 = const()[name = tensor("op_9791_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9791_cast_fp16 = slice_by_index(begin = var_9791_begin_0, end = var_9791_end_0, end_mask = var_9791_end_mask_0, x = var_9723_cast_fp16)[name = tensor("op_9791_cast_fp16")]; tensor var_9792_begin_0 = const()[name = tensor("op_9792_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9792_end_0 = const()[name = tensor("op_9792_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9792_end_mask_0 = const()[name = tensor("op_9792_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9792_cast_fp16 = slice_by_index(begin = var_9792_begin_0, end = var_9792_end_0, end_mask = var_9792_end_mask_0, x = var_9723_cast_fp16)[name = tensor("op_9792_cast_fp16")]; tensor var_9793_begin_0 = const()[name = tensor("op_9793_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9793_end_0 = const()[name = tensor("op_9793_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9793_end_mask_0 = const()[name = tensor("op_9793_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9793_cast_fp16 = slice_by_index(begin = var_9793_begin_0, end = var_9793_end_0, end_mask = var_9793_end_mask_0, x = var_9723_cast_fp16)[name = tensor("op_9793_cast_fp16")]; tensor var_9794_begin_0 = const()[name = tensor("op_9794_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9794_end_0 = const()[name = tensor("op_9794_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9794_end_mask_0 = const()[name = tensor("op_9794_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9794_cast_fp16 = slice_by_index(begin = var_9794_begin_0, end = var_9794_end_0, end_mask = var_9794_end_mask_0, x = var_9727_cast_fp16)[name = tensor("op_9794_cast_fp16")]; tensor var_9795_begin_0 = const()[name = tensor("op_9795_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9795_end_0 = const()[name = tensor("op_9795_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9795_end_mask_0 = const()[name = tensor("op_9795_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9795_cast_fp16 = slice_by_index(begin = var_9795_begin_0, end = var_9795_end_0, end_mask = var_9795_end_mask_0, x = var_9727_cast_fp16)[name = tensor("op_9795_cast_fp16")]; tensor var_9796_begin_0 = const()[name = tensor("op_9796_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9796_end_0 = const()[name = tensor("op_9796_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9796_end_mask_0 = const()[name = tensor("op_9796_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9796_cast_fp16 = slice_by_index(begin = var_9796_begin_0, end = var_9796_end_0, end_mask = var_9796_end_mask_0, x = var_9727_cast_fp16)[name = tensor("op_9796_cast_fp16")]; tensor var_9797_begin_0 = const()[name = tensor("op_9797_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9797_end_0 = const()[name = tensor("op_9797_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9797_end_mask_0 = const()[name = tensor("op_9797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9797_cast_fp16 = slice_by_index(begin = var_9797_begin_0, end = var_9797_end_0, end_mask = var_9797_end_mask_0, x = var_9727_cast_fp16)[name = tensor("op_9797_cast_fp16")]; tensor var_9798_begin_0 = const()[name = tensor("op_9798_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9798_end_0 = const()[name = tensor("op_9798_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9798_end_mask_0 = const()[name = tensor("op_9798_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9798_cast_fp16 = slice_by_index(begin = var_9798_begin_0, end = var_9798_end_0, end_mask = var_9798_end_mask_0, x = var_9727_cast_fp16)[name = tensor("op_9798_cast_fp16")]; tensor var_9799_begin_0 = const()[name = tensor("op_9799_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9799_end_0 = const()[name = tensor("op_9799_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9799_end_mask_0 = const()[name = tensor("op_9799_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9799_cast_fp16 = slice_by_index(begin = var_9799_begin_0, end = var_9799_end_0, end_mask = var_9799_end_mask_0, x = var_9727_cast_fp16)[name = tensor("op_9799_cast_fp16")]; tensor var_9800_begin_0 = const()[name = tensor("op_9800_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9800_end_0 = const()[name = tensor("op_9800_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_9800_end_mask_0 = const()[name = tensor("op_9800_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9800_cast_fp16 = slice_by_index(begin = var_9800_begin_0, end = var_9800_end_0, end_mask = var_9800_end_mask_0, x = var_9731_cast_fp16)[name = tensor("op_9800_cast_fp16")]; tensor var_9801_begin_0 = const()[name = tensor("op_9801_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9801_end_0 = const()[name = tensor("op_9801_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_9801_end_mask_0 = const()[name = tensor("op_9801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9801_cast_fp16 = slice_by_index(begin = var_9801_begin_0, end = var_9801_end_0, end_mask = var_9801_end_mask_0, x = var_9731_cast_fp16)[name = tensor("op_9801_cast_fp16")]; tensor var_9802_begin_0 = const()[name = tensor("op_9802_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9802_end_0 = const()[name = tensor("op_9802_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_9802_end_mask_0 = const()[name = tensor("op_9802_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9802_cast_fp16 = slice_by_index(begin = var_9802_begin_0, end = var_9802_end_0, end_mask = var_9802_end_mask_0, x = var_9731_cast_fp16)[name = tensor("op_9802_cast_fp16")]; tensor var_9803_begin_0 = const()[name = tensor("op_9803_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_9803_end_0 = const()[name = tensor("op_9803_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_9803_end_mask_0 = const()[name = tensor("op_9803_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9803_cast_fp16 = slice_by_index(begin = var_9803_begin_0, end = var_9803_end_0, end_mask = var_9803_end_mask_0, x = var_9731_cast_fp16)[name = tensor("op_9803_cast_fp16")]; tensor var_9804_begin_0 = const()[name = tensor("op_9804_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_9804_end_0 = const()[name = tensor("op_9804_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_9804_end_mask_0 = const()[name = tensor("op_9804_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9804_cast_fp16 = slice_by_index(begin = var_9804_begin_0, end = var_9804_end_0, end_mask = var_9804_end_mask_0, x = var_9731_cast_fp16)[name = tensor("op_9804_cast_fp16")]; tensor var_9805_begin_0 = const()[name = tensor("op_9805_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_9805_end_0 = const()[name = tensor("op_9805_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_9805_end_mask_0 = const()[name = tensor("op_9805_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9805_cast_fp16 = slice_by_index(begin = var_9805_begin_0, end = var_9805_end_0, end_mask = var_9805_end_mask_0, x = var_9731_cast_fp16)[name = tensor("op_9805_cast_fp16")]; tensor k_perm_0 = const()[name = tensor("k_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_9810_begin_0 = const()[name = tensor("op_9810_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9810_end_0 = const()[name = tensor("op_9810_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_9810_end_mask_0 = const()[name = tensor("op_9810_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor("transpose_0")]; tensor var_9810_cast_fp16 = slice_by_index(begin = var_9810_begin_0, end = var_9810_end_0, end_mask = var_9810_end_mask_0, x = k_cast_fp16)[name = tensor("op_9810_cast_fp16")]; tensor var_9814_begin_0 = const()[name = tensor("op_9814_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_9814_end_0 = const()[name = tensor("op_9814_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_9814_end_mask_0 = const()[name = tensor("op_9814_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9814_cast_fp16 = slice_by_index(begin = var_9814_begin_0, end = var_9814_end_0, end_mask = var_9814_end_mask_0, x = k_cast_fp16)[name = tensor("op_9814_cast_fp16")]; tensor var_9818_begin_0 = const()[name = tensor("op_9818_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_9818_end_0 = const()[name = tensor("op_9818_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_9818_end_mask_0 = const()[name = tensor("op_9818_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9818_cast_fp16 = slice_by_index(begin = var_9818_begin_0, end = var_9818_end_0, end_mask = var_9818_end_mask_0, x = k_cast_fp16)[name = tensor("op_9818_cast_fp16")]; tensor var_9822_begin_0 = const()[name = tensor("op_9822_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_9822_end_0 = const()[name = tensor("op_9822_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_9822_end_mask_0 = const()[name = tensor("op_9822_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9822_cast_fp16 = slice_by_index(begin = var_9822_begin_0, end = var_9822_end_0, end_mask = var_9822_end_mask_0, x = k_cast_fp16)[name = tensor("op_9822_cast_fp16")]; tensor var_9826_begin_0 = const()[name = tensor("op_9826_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_9826_end_0 = const()[name = tensor("op_9826_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_9826_end_mask_0 = const()[name = tensor("op_9826_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9826_cast_fp16 = slice_by_index(begin = var_9826_begin_0, end = var_9826_end_0, end_mask = var_9826_end_mask_0, x = k_cast_fp16)[name = tensor("op_9826_cast_fp16")]; tensor var_9830_begin_0 = const()[name = tensor("op_9830_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_9830_end_0 = const()[name = tensor("op_9830_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_9830_end_mask_0 = const()[name = tensor("op_9830_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9830_cast_fp16 = slice_by_index(begin = var_9830_begin_0, end = var_9830_end_0, end_mask = var_9830_end_mask_0, x = k_cast_fp16)[name = tensor("op_9830_cast_fp16")]; tensor var_9834_begin_0 = const()[name = tensor("op_9834_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_9834_end_0 = const()[name = tensor("op_9834_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_9834_end_mask_0 = const()[name = tensor("op_9834_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9834_cast_fp16 = slice_by_index(begin = var_9834_begin_0, end = var_9834_end_0, end_mask = var_9834_end_mask_0, x = k_cast_fp16)[name = tensor("op_9834_cast_fp16")]; tensor var_9838_begin_0 = const()[name = tensor("op_9838_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_9838_end_0 = const()[name = tensor("op_9838_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_9838_end_mask_0 = const()[name = tensor("op_9838_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9838_cast_fp16 = slice_by_index(begin = var_9838_begin_0, end = var_9838_end_0, end_mask = var_9838_end_mask_0, x = k_cast_fp16)[name = tensor("op_9838_cast_fp16")]; tensor var_9842_begin_0 = const()[name = tensor("op_9842_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9842_end_0 = const()[name = tensor("op_9842_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_9842_end_mask_0 = const()[name = tensor("op_9842_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9842_cast_fp16 = slice_by_index(begin = var_9842_begin_0, end = var_9842_end_0, end_mask = var_9842_end_mask_0, x = k_cast_fp16)[name = tensor("op_9842_cast_fp16")]; tensor var_9846_begin_0 = const()[name = tensor("op_9846_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_9846_end_0 = const()[name = tensor("op_9846_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_9846_end_mask_0 = const()[name = tensor("op_9846_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9846_cast_fp16 = slice_by_index(begin = var_9846_begin_0, end = var_9846_end_0, end_mask = var_9846_end_mask_0, x = k_cast_fp16)[name = tensor("op_9846_cast_fp16")]; tensor var_9850_begin_0 = const()[name = tensor("op_9850_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_9850_end_0 = const()[name = tensor("op_9850_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_9850_end_mask_0 = const()[name = tensor("op_9850_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9850_cast_fp16 = slice_by_index(begin = var_9850_begin_0, end = var_9850_end_0, end_mask = var_9850_end_mask_0, x = k_cast_fp16)[name = tensor("op_9850_cast_fp16")]; tensor var_9854_begin_0 = const()[name = tensor("op_9854_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_9854_end_0 = const()[name = tensor("op_9854_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_9854_end_mask_0 = const()[name = tensor("op_9854_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9854_cast_fp16 = slice_by_index(begin = var_9854_begin_0, end = var_9854_end_0, end_mask = var_9854_end_mask_0, x = k_cast_fp16)[name = tensor("op_9854_cast_fp16")]; tensor var_9856_begin_0 = const()[name = tensor("op_9856_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9856_end_0 = const()[name = tensor("op_9856_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_9856_end_mask_0 = const()[name = tensor("op_9856_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9856_cast_fp16 = slice_by_index(begin = var_9856_begin_0, end = var_9856_end_0, end_mask = var_9856_end_mask_0, x = value_cast_fp16)[name = tensor("op_9856_cast_fp16")]; tensor var_9860_begin_0 = const()[name = tensor("op_9860_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_9860_end_0 = const()[name = tensor("op_9860_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_9860_end_mask_0 = const()[name = tensor("op_9860_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9860_cast_fp16 = slice_by_index(begin = var_9860_begin_0, end = var_9860_end_0, end_mask = var_9860_end_mask_0, x = value_cast_fp16)[name = tensor("op_9860_cast_fp16")]; tensor var_9864_begin_0 = const()[name = tensor("op_9864_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_9864_end_0 = const()[name = tensor("op_9864_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_9864_end_mask_0 = const()[name = tensor("op_9864_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9864_cast_fp16 = slice_by_index(begin = var_9864_begin_0, end = var_9864_end_0, end_mask = var_9864_end_mask_0, x = value_cast_fp16)[name = tensor("op_9864_cast_fp16")]; tensor var_9868_begin_0 = const()[name = tensor("op_9868_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_9868_end_0 = const()[name = tensor("op_9868_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_9868_end_mask_0 = const()[name = tensor("op_9868_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9868_cast_fp16 = slice_by_index(begin = var_9868_begin_0, end = var_9868_end_0, end_mask = var_9868_end_mask_0, x = value_cast_fp16)[name = tensor("op_9868_cast_fp16")]; tensor var_9872_begin_0 = const()[name = tensor("op_9872_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_9872_end_0 = const()[name = tensor("op_9872_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_9872_end_mask_0 = const()[name = tensor("op_9872_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9872_cast_fp16 = slice_by_index(begin = var_9872_begin_0, end = var_9872_end_0, end_mask = var_9872_end_mask_0, x = value_cast_fp16)[name = tensor("op_9872_cast_fp16")]; tensor var_9876_begin_0 = const()[name = tensor("op_9876_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9876_end_0 = const()[name = tensor("op_9876_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_9876_end_mask_0 = const()[name = tensor("op_9876_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9876_cast_fp16 = slice_by_index(begin = var_9876_begin_0, end = var_9876_end_0, end_mask = var_9876_end_mask_0, x = value_cast_fp16)[name = tensor("op_9876_cast_fp16")]; tensor var_9880_begin_0 = const()[name = tensor("op_9880_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_9880_end_0 = const()[name = tensor("op_9880_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_9880_end_mask_0 = const()[name = tensor("op_9880_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9880_cast_fp16 = slice_by_index(begin = var_9880_begin_0, end = var_9880_end_0, end_mask = var_9880_end_mask_0, x = value_cast_fp16)[name = tensor("op_9880_cast_fp16")]; tensor var_9884_begin_0 = const()[name = tensor("op_9884_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_9884_end_0 = const()[name = tensor("op_9884_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_9884_end_mask_0 = const()[name = tensor("op_9884_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9884_cast_fp16 = slice_by_index(begin = var_9884_begin_0, end = var_9884_end_0, end_mask = var_9884_end_mask_0, x = value_cast_fp16)[name = tensor("op_9884_cast_fp16")]; tensor var_9888_begin_0 = const()[name = tensor("op_9888_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_9888_end_0 = const()[name = tensor("op_9888_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_9888_end_mask_0 = const()[name = tensor("op_9888_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9888_cast_fp16 = slice_by_index(begin = var_9888_begin_0, end = var_9888_end_0, end_mask = var_9888_end_mask_0, x = value_cast_fp16)[name = tensor("op_9888_cast_fp16")]; tensor var_9892_begin_0 = const()[name = tensor("op_9892_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_9892_end_0 = const()[name = tensor("op_9892_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_9892_end_mask_0 = const()[name = tensor("op_9892_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9892_cast_fp16 = slice_by_index(begin = var_9892_begin_0, end = var_9892_end_0, end_mask = var_9892_end_mask_0, x = value_cast_fp16)[name = tensor("op_9892_cast_fp16")]; tensor var_9896_begin_0 = const()[name = tensor("op_9896_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_9896_end_0 = const()[name = tensor("op_9896_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_9896_end_mask_0 = const()[name = tensor("op_9896_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9896_cast_fp16 = slice_by_index(begin = var_9896_begin_0, end = var_9896_end_0, end_mask = var_9896_end_mask_0, x = value_cast_fp16)[name = tensor("op_9896_cast_fp16")]; tensor var_9900_begin_0 = const()[name = tensor("op_9900_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_9900_end_0 = const()[name = tensor("op_9900_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_9900_end_mask_0 = const()[name = tensor("op_9900_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_9900_cast_fp16 = slice_by_index(begin = var_9900_begin_0, end = var_9900_end_0, end_mask = var_9900_end_mask_0, x = value_cast_fp16)[name = tensor("op_9900_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1585_equation_0, values = (var_9810_cast_fp16, var_9734_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1587_equation_0, values = (var_9810_cast_fp16, var_9735_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1589_equation_0, values = (var_9810_cast_fp16, var_9736_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1591_equation_0, values = (var_9810_cast_fp16, var_9737_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1593_equation_0, values = (var_9810_cast_fp16, var_9738_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1595_equation_0, values = (var_9810_cast_fp16, var_9739_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1597_equation_0, values = (var_9814_cast_fp16, var_9740_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1599_equation_0, values = (var_9814_cast_fp16, var_9741_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1599_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1601_equation_0, values = (var_9814_cast_fp16, var_9742_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1603_equation_0, values = (var_9814_cast_fp16, var_9743_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1605_equation_0, values = (var_9814_cast_fp16, var_9744_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1607_equation_0, values = (var_9814_cast_fp16, var_9745_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1609_equation_0, values = (var_9818_cast_fp16, var_9746_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1611_equation_0, values = (var_9818_cast_fp16, var_9747_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1613_equation_0, values = (var_9818_cast_fp16, var_9748_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1615_equation_0, values = (var_9818_cast_fp16, var_9749_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1617_equation_0, values = (var_9818_cast_fp16, var_9750_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1619_equation_0, values = (var_9818_cast_fp16, var_9751_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1621_equation_0, values = (var_9822_cast_fp16, var_9752_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1623_equation_0, values = (var_9822_cast_fp16, var_9753_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1625_equation_0, values = (var_9822_cast_fp16, var_9754_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1627_equation_0, values = (var_9822_cast_fp16, var_9755_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1629_equation_0, values = (var_9822_cast_fp16, var_9756_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1631_equation_0, values = (var_9822_cast_fp16, var_9757_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1633_equation_0, values = (var_9826_cast_fp16, var_9758_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1635_equation_0, values = (var_9826_cast_fp16, var_9759_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1637_equation_0, values = (var_9826_cast_fp16, var_9760_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1639_equation_0, values = (var_9826_cast_fp16, var_9761_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1639_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1641_equation_0, values = (var_9826_cast_fp16, var_9762_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1643_equation_0, values = (var_9826_cast_fp16, var_9763_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1645_equation_0, values = (var_9830_cast_fp16, var_9764_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1647_equation_0, values = (var_9830_cast_fp16, var_9765_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1649_equation_0, values = (var_9830_cast_fp16, var_9766_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1651_equation_0, values = (var_9830_cast_fp16, var_9767_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1653_equation_0, values = (var_9830_cast_fp16, var_9768_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1655_equation_0, values = (var_9830_cast_fp16, var_9769_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1657_equation_0, values = (var_9834_cast_fp16, var_9770_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1659_equation_0, values = (var_9834_cast_fp16, var_9771_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1661_equation_0, values = (var_9834_cast_fp16, var_9772_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1663_equation_0, values = (var_9834_cast_fp16, var_9773_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1665_equation_0, values = (var_9834_cast_fp16, var_9774_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1667_equation_0, values = (var_9834_cast_fp16, var_9775_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1669_equation_0, values = (var_9838_cast_fp16, var_9776_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1671_equation_0, values = (var_9838_cast_fp16, var_9777_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1673_equation_0, values = (var_9838_cast_fp16, var_9778_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1675_equation_0, values = (var_9838_cast_fp16, var_9779_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1677_equation_0, values = (var_9838_cast_fp16, var_9780_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1679_equation_0, values = (var_9838_cast_fp16, var_9781_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1679_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1681_equation_0, values = (var_9842_cast_fp16, var_9782_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1683_equation_0, values = (var_9842_cast_fp16, var_9783_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1685_equation_0, values = (var_9842_cast_fp16, var_9784_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1687_equation_0, values = (var_9842_cast_fp16, var_9785_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1689_equation_0, values = (var_9842_cast_fp16, var_9786_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1691_equation_0, values = (var_9842_cast_fp16, var_9787_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1693_equation_0, values = (var_9846_cast_fp16, var_9788_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1695_equation_0, values = (var_9846_cast_fp16, var_9789_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1697_equation_0, values = (var_9846_cast_fp16, var_9790_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1699_equation_0, values = (var_9846_cast_fp16, var_9791_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1701_equation_0, values = (var_9846_cast_fp16, var_9792_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1703_equation_0, values = (var_9846_cast_fp16, var_9793_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1705_equation_0, values = (var_9850_cast_fp16, var_9794_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1707_equation_0, values = (var_9850_cast_fp16, var_9795_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1709_equation_0, values = (var_9850_cast_fp16, var_9796_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1711_equation_0, values = (var_9850_cast_fp16, var_9797_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1713_equation_0, values = (var_9850_cast_fp16, var_9798_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1715_equation_0, values = (var_9850_cast_fp16, var_9799_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1717_equation_0, values = (var_9854_cast_fp16, var_9800_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1719_equation_0, values = (var_9854_cast_fp16, var_9801_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1719_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1721_equation_0, values = (var_9854_cast_fp16, var_9802_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1723_equation_0, values = (var_9854_cast_fp16, var_9803_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1725_equation_0, values = (var_9854_cast_fp16, var_9804_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_equation_0, values = (var_9854_cast_fp16, var_9805_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_cast_fp16")]; tensor var_10047_to_fp16 = const()[name = tensor("op_10047_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1585_cast_fp16, y = var_10047_to_fp16)[name = tensor("aw_chunk_1585_cast_fp16")]; tensor var_10049_to_fp16 = const()[name = tensor("op_10049_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1587_cast_fp16, y = var_10049_to_fp16)[name = tensor("aw_chunk_1587_cast_fp16")]; tensor var_10051_to_fp16 = const()[name = tensor("op_10051_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1589_cast_fp16, y = var_10051_to_fp16)[name = tensor("aw_chunk_1589_cast_fp16")]; tensor var_10053_to_fp16 = const()[name = tensor("op_10053_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1591_cast_fp16, y = var_10053_to_fp16)[name = tensor("aw_chunk_1591_cast_fp16")]; tensor var_10055_to_fp16 = const()[name = tensor("op_10055_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1593_cast_fp16, y = var_10055_to_fp16)[name = tensor("aw_chunk_1593_cast_fp16")]; tensor var_10057_to_fp16 = const()[name = tensor("op_10057_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1595_cast_fp16, y = var_10057_to_fp16)[name = tensor("aw_chunk_1595_cast_fp16")]; tensor var_10059_to_fp16 = const()[name = tensor("op_10059_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1597_cast_fp16, y = var_10059_to_fp16)[name = tensor("aw_chunk_1597_cast_fp16")]; tensor var_10061_to_fp16 = const()[name = tensor("op_10061_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1599_cast_fp16, y = var_10061_to_fp16)[name = tensor("aw_chunk_1599_cast_fp16")]; tensor var_10063_to_fp16 = const()[name = tensor("op_10063_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1601_cast_fp16, y = var_10063_to_fp16)[name = tensor("aw_chunk_1601_cast_fp16")]; tensor var_10065_to_fp16 = const()[name = tensor("op_10065_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1603_cast_fp16, y = var_10065_to_fp16)[name = tensor("aw_chunk_1603_cast_fp16")]; tensor var_10067_to_fp16 = const()[name = tensor("op_10067_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1605_cast_fp16, y = var_10067_to_fp16)[name = tensor("aw_chunk_1605_cast_fp16")]; tensor var_10069_to_fp16 = const()[name = tensor("op_10069_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1607_cast_fp16, y = var_10069_to_fp16)[name = tensor("aw_chunk_1607_cast_fp16")]; tensor var_10071_to_fp16 = const()[name = tensor("op_10071_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1609_cast_fp16, y = var_10071_to_fp16)[name = tensor("aw_chunk_1609_cast_fp16")]; tensor var_10073_to_fp16 = const()[name = tensor("op_10073_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1611_cast_fp16, y = var_10073_to_fp16)[name = tensor("aw_chunk_1611_cast_fp16")]; tensor var_10075_to_fp16 = const()[name = tensor("op_10075_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1613_cast_fp16, y = var_10075_to_fp16)[name = tensor("aw_chunk_1613_cast_fp16")]; tensor var_10077_to_fp16 = const()[name = tensor("op_10077_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1615_cast_fp16, y = var_10077_to_fp16)[name = tensor("aw_chunk_1615_cast_fp16")]; tensor var_10079_to_fp16 = const()[name = tensor("op_10079_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1617_cast_fp16, y = var_10079_to_fp16)[name = tensor("aw_chunk_1617_cast_fp16")]; tensor var_10081_to_fp16 = const()[name = tensor("op_10081_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1619_cast_fp16, y = var_10081_to_fp16)[name = tensor("aw_chunk_1619_cast_fp16")]; tensor var_10083_to_fp16 = const()[name = tensor("op_10083_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1621_cast_fp16, y = var_10083_to_fp16)[name = tensor("aw_chunk_1621_cast_fp16")]; tensor var_10085_to_fp16 = const()[name = tensor("op_10085_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1623_cast_fp16, y = var_10085_to_fp16)[name = tensor("aw_chunk_1623_cast_fp16")]; tensor var_10087_to_fp16 = const()[name = tensor("op_10087_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1625_cast_fp16, y = var_10087_to_fp16)[name = tensor("aw_chunk_1625_cast_fp16")]; tensor var_10089_to_fp16 = const()[name = tensor("op_10089_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1627_cast_fp16, y = var_10089_to_fp16)[name = tensor("aw_chunk_1627_cast_fp16")]; tensor var_10091_to_fp16 = const()[name = tensor("op_10091_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1629_cast_fp16, y = var_10091_to_fp16)[name = tensor("aw_chunk_1629_cast_fp16")]; tensor var_10093_to_fp16 = const()[name = tensor("op_10093_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1631_cast_fp16, y = var_10093_to_fp16)[name = tensor("aw_chunk_1631_cast_fp16")]; tensor var_10095_to_fp16 = const()[name = tensor("op_10095_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1633_cast_fp16, y = var_10095_to_fp16)[name = tensor("aw_chunk_1633_cast_fp16")]; tensor var_10097_to_fp16 = const()[name = tensor("op_10097_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1635_cast_fp16, y = var_10097_to_fp16)[name = tensor("aw_chunk_1635_cast_fp16")]; tensor var_10099_to_fp16 = const()[name = tensor("op_10099_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1637_cast_fp16, y = var_10099_to_fp16)[name = tensor("aw_chunk_1637_cast_fp16")]; tensor var_10101_to_fp16 = const()[name = tensor("op_10101_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1639_cast_fp16, y = var_10101_to_fp16)[name = tensor("aw_chunk_1639_cast_fp16")]; tensor var_10103_to_fp16 = const()[name = tensor("op_10103_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1641_cast_fp16, y = var_10103_to_fp16)[name = tensor("aw_chunk_1641_cast_fp16")]; tensor var_10105_to_fp16 = const()[name = tensor("op_10105_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1643_cast_fp16, y = var_10105_to_fp16)[name = tensor("aw_chunk_1643_cast_fp16")]; tensor var_10107_to_fp16 = const()[name = tensor("op_10107_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1645_cast_fp16, y = var_10107_to_fp16)[name = tensor("aw_chunk_1645_cast_fp16")]; tensor var_10109_to_fp16 = const()[name = tensor("op_10109_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1647_cast_fp16, y = var_10109_to_fp16)[name = tensor("aw_chunk_1647_cast_fp16")]; tensor var_10111_to_fp16 = const()[name = tensor("op_10111_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1649_cast_fp16, y = var_10111_to_fp16)[name = tensor("aw_chunk_1649_cast_fp16")]; tensor var_10113_to_fp16 = const()[name = tensor("op_10113_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1651_cast_fp16, y = var_10113_to_fp16)[name = tensor("aw_chunk_1651_cast_fp16")]; tensor var_10115_to_fp16 = const()[name = tensor("op_10115_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1653_cast_fp16, y = var_10115_to_fp16)[name = tensor("aw_chunk_1653_cast_fp16")]; tensor var_10117_to_fp16 = const()[name = tensor("op_10117_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1655_cast_fp16, y = var_10117_to_fp16)[name = tensor("aw_chunk_1655_cast_fp16")]; tensor var_10119_to_fp16 = const()[name = tensor("op_10119_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1657_cast_fp16, y = var_10119_to_fp16)[name = tensor("aw_chunk_1657_cast_fp16")]; tensor var_10121_to_fp16 = const()[name = tensor("op_10121_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1659_cast_fp16, y = var_10121_to_fp16)[name = tensor("aw_chunk_1659_cast_fp16")]; tensor var_10123_to_fp16 = const()[name = tensor("op_10123_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1661_cast_fp16, y = var_10123_to_fp16)[name = tensor("aw_chunk_1661_cast_fp16")]; tensor var_10125_to_fp16 = const()[name = tensor("op_10125_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1663_cast_fp16, y = var_10125_to_fp16)[name = tensor("aw_chunk_1663_cast_fp16")]; tensor var_10127_to_fp16 = const()[name = tensor("op_10127_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1665_cast_fp16, y = var_10127_to_fp16)[name = tensor("aw_chunk_1665_cast_fp16")]; tensor var_10129_to_fp16 = const()[name = tensor("op_10129_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1667_cast_fp16, y = var_10129_to_fp16)[name = tensor("aw_chunk_1667_cast_fp16")]; tensor var_10131_to_fp16 = const()[name = tensor("op_10131_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1669_cast_fp16, y = var_10131_to_fp16)[name = tensor("aw_chunk_1669_cast_fp16")]; tensor var_10133_to_fp16 = const()[name = tensor("op_10133_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1671_cast_fp16, y = var_10133_to_fp16)[name = tensor("aw_chunk_1671_cast_fp16")]; tensor var_10135_to_fp16 = const()[name = tensor("op_10135_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1673_cast_fp16, y = var_10135_to_fp16)[name = tensor("aw_chunk_1673_cast_fp16")]; tensor var_10137_to_fp16 = const()[name = tensor("op_10137_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1675_cast_fp16, y = var_10137_to_fp16)[name = tensor("aw_chunk_1675_cast_fp16")]; tensor var_10139_to_fp16 = const()[name = tensor("op_10139_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1677_cast_fp16, y = var_10139_to_fp16)[name = tensor("aw_chunk_1677_cast_fp16")]; tensor var_10141_to_fp16 = const()[name = tensor("op_10141_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1679_cast_fp16, y = var_10141_to_fp16)[name = tensor("aw_chunk_1679_cast_fp16")]; tensor var_10143_to_fp16 = const()[name = tensor("op_10143_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1681_cast_fp16, y = var_10143_to_fp16)[name = tensor("aw_chunk_1681_cast_fp16")]; tensor var_10145_to_fp16 = const()[name = tensor("op_10145_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1683_cast_fp16, y = var_10145_to_fp16)[name = tensor("aw_chunk_1683_cast_fp16")]; tensor var_10147_to_fp16 = const()[name = tensor("op_10147_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1685_cast_fp16, y = var_10147_to_fp16)[name = tensor("aw_chunk_1685_cast_fp16")]; tensor var_10149_to_fp16 = const()[name = tensor("op_10149_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1687_cast_fp16, y = var_10149_to_fp16)[name = tensor("aw_chunk_1687_cast_fp16")]; tensor var_10151_to_fp16 = const()[name = tensor("op_10151_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1689_cast_fp16, y = var_10151_to_fp16)[name = tensor("aw_chunk_1689_cast_fp16")]; tensor var_10153_to_fp16 = const()[name = tensor("op_10153_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1691_cast_fp16, y = var_10153_to_fp16)[name = tensor("aw_chunk_1691_cast_fp16")]; tensor var_10155_to_fp16 = const()[name = tensor("op_10155_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1693_cast_fp16, y = var_10155_to_fp16)[name = tensor("aw_chunk_1693_cast_fp16")]; tensor var_10157_to_fp16 = const()[name = tensor("op_10157_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1695_cast_fp16, y = var_10157_to_fp16)[name = tensor("aw_chunk_1695_cast_fp16")]; tensor var_10159_to_fp16 = const()[name = tensor("op_10159_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1697_cast_fp16, y = var_10159_to_fp16)[name = tensor("aw_chunk_1697_cast_fp16")]; tensor var_10161_to_fp16 = const()[name = tensor("op_10161_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1699_cast_fp16, y = var_10161_to_fp16)[name = tensor("aw_chunk_1699_cast_fp16")]; tensor var_10163_to_fp16 = const()[name = tensor("op_10163_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1701_cast_fp16, y = var_10163_to_fp16)[name = tensor("aw_chunk_1701_cast_fp16")]; tensor var_10165_to_fp16 = const()[name = tensor("op_10165_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1703_cast_fp16, y = var_10165_to_fp16)[name = tensor("aw_chunk_1703_cast_fp16")]; tensor var_10167_to_fp16 = const()[name = tensor("op_10167_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1705_cast_fp16, y = var_10167_to_fp16)[name = tensor("aw_chunk_1705_cast_fp16")]; tensor var_10169_to_fp16 = const()[name = tensor("op_10169_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1707_cast_fp16, y = var_10169_to_fp16)[name = tensor("aw_chunk_1707_cast_fp16")]; tensor var_10171_to_fp16 = const()[name = tensor("op_10171_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1709_cast_fp16, y = var_10171_to_fp16)[name = tensor("aw_chunk_1709_cast_fp16")]; tensor var_10173_to_fp16 = const()[name = tensor("op_10173_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1711_cast_fp16, y = var_10173_to_fp16)[name = tensor("aw_chunk_1711_cast_fp16")]; tensor var_10175_to_fp16 = const()[name = tensor("op_10175_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1713_cast_fp16, y = var_10175_to_fp16)[name = tensor("aw_chunk_1713_cast_fp16")]; tensor var_10177_to_fp16 = const()[name = tensor("op_10177_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1715_cast_fp16, y = var_10177_to_fp16)[name = tensor("aw_chunk_1715_cast_fp16")]; tensor var_10179_to_fp16 = const()[name = tensor("op_10179_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1717_cast_fp16, y = var_10179_to_fp16)[name = tensor("aw_chunk_1717_cast_fp16")]; tensor var_10181_to_fp16 = const()[name = tensor("op_10181_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1719_cast_fp16, y = var_10181_to_fp16)[name = tensor("aw_chunk_1719_cast_fp16")]; tensor var_10183_to_fp16 = const()[name = tensor("op_10183_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1721_cast_fp16, y = var_10183_to_fp16)[name = tensor("aw_chunk_1721_cast_fp16")]; tensor var_10185_to_fp16 = const()[name = tensor("op_10185_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1723_cast_fp16, y = var_10185_to_fp16)[name = tensor("aw_chunk_1723_cast_fp16")]; tensor var_10187_to_fp16 = const()[name = tensor("op_10187_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1725_cast_fp16, y = var_10187_to_fp16)[name = tensor("aw_chunk_1725_cast_fp16")]; tensor var_10189_to_fp16 = const()[name = tensor("op_10189_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_cast_fp16, y = var_10189_to_fp16)[name = tensor("aw_chunk_cast_fp16")]; tensor var_10191_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1585_cast_fp16)[name = tensor("op_10191_cast_fp16")]; tensor var_10192_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1587_cast_fp16)[name = tensor("op_10192_cast_fp16")]; tensor var_10193_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1589_cast_fp16)[name = tensor("op_10193_cast_fp16")]; tensor var_10194_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1591_cast_fp16)[name = tensor("op_10194_cast_fp16")]; tensor var_10195_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1593_cast_fp16)[name = tensor("op_10195_cast_fp16")]; tensor var_10196_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1595_cast_fp16)[name = tensor("op_10196_cast_fp16")]; tensor var_10197_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1597_cast_fp16)[name = tensor("op_10197_cast_fp16")]; tensor var_10198_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1599_cast_fp16)[name = tensor("op_10198_cast_fp16")]; tensor var_10199_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1601_cast_fp16)[name = tensor("op_10199_cast_fp16")]; tensor var_10200_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1603_cast_fp16)[name = tensor("op_10200_cast_fp16")]; tensor var_10201_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1605_cast_fp16)[name = tensor("op_10201_cast_fp16")]; tensor var_10202_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1607_cast_fp16)[name = tensor("op_10202_cast_fp16")]; tensor var_10203_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1609_cast_fp16)[name = tensor("op_10203_cast_fp16")]; tensor var_10204_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1611_cast_fp16)[name = tensor("op_10204_cast_fp16")]; tensor var_10205_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1613_cast_fp16)[name = tensor("op_10205_cast_fp16")]; tensor var_10206_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1615_cast_fp16)[name = tensor("op_10206_cast_fp16")]; tensor var_10207_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1617_cast_fp16)[name = tensor("op_10207_cast_fp16")]; tensor var_10208_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1619_cast_fp16)[name = tensor("op_10208_cast_fp16")]; tensor var_10209_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1621_cast_fp16)[name = tensor("op_10209_cast_fp16")]; tensor var_10210_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1623_cast_fp16)[name = tensor("op_10210_cast_fp16")]; tensor var_10211_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1625_cast_fp16)[name = tensor("op_10211_cast_fp16")]; tensor var_10212_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1627_cast_fp16)[name = tensor("op_10212_cast_fp16")]; tensor var_10213_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1629_cast_fp16)[name = tensor("op_10213_cast_fp16")]; tensor var_10214_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1631_cast_fp16)[name = tensor("op_10214_cast_fp16")]; tensor var_10215_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1633_cast_fp16)[name = tensor("op_10215_cast_fp16")]; tensor var_10216_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1635_cast_fp16)[name = tensor("op_10216_cast_fp16")]; tensor var_10217_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1637_cast_fp16)[name = tensor("op_10217_cast_fp16")]; tensor var_10218_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1639_cast_fp16)[name = tensor("op_10218_cast_fp16")]; tensor var_10219_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1641_cast_fp16)[name = tensor("op_10219_cast_fp16")]; tensor var_10220_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1643_cast_fp16)[name = tensor("op_10220_cast_fp16")]; tensor var_10221_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1645_cast_fp16)[name = tensor("op_10221_cast_fp16")]; tensor var_10222_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1647_cast_fp16)[name = tensor("op_10222_cast_fp16")]; tensor var_10223_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1649_cast_fp16)[name = tensor("op_10223_cast_fp16")]; tensor var_10224_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1651_cast_fp16)[name = tensor("op_10224_cast_fp16")]; tensor var_10225_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1653_cast_fp16)[name = tensor("op_10225_cast_fp16")]; tensor var_10226_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1655_cast_fp16)[name = tensor("op_10226_cast_fp16")]; tensor var_10227_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1657_cast_fp16)[name = tensor("op_10227_cast_fp16")]; tensor var_10228_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1659_cast_fp16)[name = tensor("op_10228_cast_fp16")]; tensor var_10229_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1661_cast_fp16)[name = tensor("op_10229_cast_fp16")]; tensor var_10230_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1663_cast_fp16)[name = tensor("op_10230_cast_fp16")]; tensor var_10231_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1665_cast_fp16)[name = tensor("op_10231_cast_fp16")]; tensor var_10232_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1667_cast_fp16)[name = tensor("op_10232_cast_fp16")]; tensor var_10233_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1669_cast_fp16)[name = tensor("op_10233_cast_fp16")]; tensor var_10234_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1671_cast_fp16)[name = tensor("op_10234_cast_fp16")]; tensor var_10235_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1673_cast_fp16)[name = tensor("op_10235_cast_fp16")]; tensor var_10236_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1675_cast_fp16)[name = tensor("op_10236_cast_fp16")]; tensor var_10237_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1677_cast_fp16)[name = tensor("op_10237_cast_fp16")]; tensor var_10238_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1679_cast_fp16)[name = tensor("op_10238_cast_fp16")]; tensor var_10239_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1681_cast_fp16)[name = tensor("op_10239_cast_fp16")]; tensor var_10240_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1683_cast_fp16)[name = tensor("op_10240_cast_fp16")]; tensor var_10241_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1685_cast_fp16)[name = tensor("op_10241_cast_fp16")]; tensor var_10242_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1687_cast_fp16)[name = tensor("op_10242_cast_fp16")]; tensor var_10243_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1689_cast_fp16)[name = tensor("op_10243_cast_fp16")]; tensor var_10244_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1691_cast_fp16)[name = tensor("op_10244_cast_fp16")]; tensor var_10245_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1693_cast_fp16)[name = tensor("op_10245_cast_fp16")]; tensor var_10246_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1695_cast_fp16)[name = tensor("op_10246_cast_fp16")]; tensor var_10247_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1697_cast_fp16)[name = tensor("op_10247_cast_fp16")]; tensor var_10248_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1699_cast_fp16)[name = tensor("op_10248_cast_fp16")]; tensor var_10249_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1701_cast_fp16)[name = tensor("op_10249_cast_fp16")]; tensor var_10250_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1703_cast_fp16)[name = tensor("op_10250_cast_fp16")]; tensor var_10251_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1705_cast_fp16)[name = tensor("op_10251_cast_fp16")]; tensor var_10252_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1707_cast_fp16)[name = tensor("op_10252_cast_fp16")]; tensor var_10253_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1709_cast_fp16)[name = tensor("op_10253_cast_fp16")]; tensor var_10254_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1711_cast_fp16)[name = tensor("op_10254_cast_fp16")]; tensor var_10255_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1713_cast_fp16)[name = tensor("op_10255_cast_fp16")]; tensor var_10256_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1715_cast_fp16)[name = tensor("op_10256_cast_fp16")]; tensor var_10257_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1717_cast_fp16)[name = tensor("op_10257_cast_fp16")]; tensor var_10258_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1719_cast_fp16)[name = tensor("op_10258_cast_fp16")]; tensor var_10259_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1721_cast_fp16)[name = tensor("op_10259_cast_fp16")]; tensor var_10260_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1723_cast_fp16)[name = tensor("op_10260_cast_fp16")]; tensor var_10261_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_1725_cast_fp16)[name = tensor("op_10261_cast_fp16")]; tensor var_10262_cast_fp16 = softmax(axis = var_9635, x = aw_chunk_cast_fp16)[name = tensor("op_10262_cast_fp16")]; tensor var_10264_equation_0 = const()[name = tensor("op_10264_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10264_cast_fp16 = einsum(equation = var_10264_equation_0, values = (var_9856_cast_fp16, var_10191_cast_fp16))[name = tensor("op_10264_cast_fp16")]; tensor var_10266_equation_0 = const()[name = tensor("op_10266_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10266_cast_fp16 = einsum(equation = var_10266_equation_0, values = (var_9856_cast_fp16, var_10192_cast_fp16))[name = tensor("op_10266_cast_fp16")]; tensor var_10268_equation_0 = const()[name = tensor("op_10268_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10268_cast_fp16 = einsum(equation = var_10268_equation_0, values = (var_9856_cast_fp16, var_10193_cast_fp16))[name = tensor("op_10268_cast_fp16")]; tensor var_10270_equation_0 = const()[name = tensor("op_10270_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10270_cast_fp16 = einsum(equation = var_10270_equation_0, values = (var_9856_cast_fp16, var_10194_cast_fp16))[name = tensor("op_10270_cast_fp16")]; tensor var_10272_equation_0 = const()[name = tensor("op_10272_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10272_cast_fp16 = einsum(equation = var_10272_equation_0, values = (var_9856_cast_fp16, var_10195_cast_fp16))[name = tensor("op_10272_cast_fp16")]; tensor var_10274_equation_0 = const()[name = tensor("op_10274_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10274_cast_fp16 = einsum(equation = var_10274_equation_0, values = (var_9856_cast_fp16, var_10196_cast_fp16))[name = tensor("op_10274_cast_fp16")]; tensor var_10276_equation_0 = const()[name = tensor("op_10276_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10276_cast_fp16 = einsum(equation = var_10276_equation_0, values = (var_9860_cast_fp16, var_10197_cast_fp16))[name = tensor("op_10276_cast_fp16")]; tensor var_10278_equation_0 = const()[name = tensor("op_10278_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10278_cast_fp16 = einsum(equation = var_10278_equation_0, values = (var_9860_cast_fp16, var_10198_cast_fp16))[name = tensor("op_10278_cast_fp16")]; tensor var_10280_equation_0 = const()[name = tensor("op_10280_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10280_cast_fp16 = einsum(equation = var_10280_equation_0, values = (var_9860_cast_fp16, var_10199_cast_fp16))[name = tensor("op_10280_cast_fp16")]; tensor var_10282_equation_0 = const()[name = tensor("op_10282_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10282_cast_fp16 = einsum(equation = var_10282_equation_0, values = (var_9860_cast_fp16, var_10200_cast_fp16))[name = tensor("op_10282_cast_fp16")]; tensor var_10284_equation_0 = const()[name = tensor("op_10284_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10284_cast_fp16 = einsum(equation = var_10284_equation_0, values = (var_9860_cast_fp16, var_10201_cast_fp16))[name = tensor("op_10284_cast_fp16")]; tensor var_10286_equation_0 = const()[name = tensor("op_10286_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10286_cast_fp16 = einsum(equation = var_10286_equation_0, values = (var_9860_cast_fp16, var_10202_cast_fp16))[name = tensor("op_10286_cast_fp16")]; tensor var_10288_equation_0 = const()[name = tensor("op_10288_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10288_cast_fp16 = einsum(equation = var_10288_equation_0, values = (var_9864_cast_fp16, var_10203_cast_fp16))[name = tensor("op_10288_cast_fp16")]; tensor var_10290_equation_0 = const()[name = tensor("op_10290_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10290_cast_fp16 = einsum(equation = var_10290_equation_0, values = (var_9864_cast_fp16, var_10204_cast_fp16))[name = tensor("op_10290_cast_fp16")]; tensor var_10292_equation_0 = const()[name = tensor("op_10292_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10292_cast_fp16 = einsum(equation = var_10292_equation_0, values = (var_9864_cast_fp16, var_10205_cast_fp16))[name = tensor("op_10292_cast_fp16")]; tensor var_10294_equation_0 = const()[name = tensor("op_10294_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10294_cast_fp16 = einsum(equation = var_10294_equation_0, values = (var_9864_cast_fp16, var_10206_cast_fp16))[name = tensor("op_10294_cast_fp16")]; tensor var_10296_equation_0 = const()[name = tensor("op_10296_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10296_cast_fp16 = einsum(equation = var_10296_equation_0, values = (var_9864_cast_fp16, var_10207_cast_fp16))[name = tensor("op_10296_cast_fp16")]; tensor var_10298_equation_0 = const()[name = tensor("op_10298_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10298_cast_fp16 = einsum(equation = var_10298_equation_0, values = (var_9864_cast_fp16, var_10208_cast_fp16))[name = tensor("op_10298_cast_fp16")]; tensor var_10300_equation_0 = const()[name = tensor("op_10300_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10300_cast_fp16 = einsum(equation = var_10300_equation_0, values = (var_9868_cast_fp16, var_10209_cast_fp16))[name = tensor("op_10300_cast_fp16")]; tensor var_10302_equation_0 = const()[name = tensor("op_10302_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10302_cast_fp16 = einsum(equation = var_10302_equation_0, values = (var_9868_cast_fp16, var_10210_cast_fp16))[name = tensor("op_10302_cast_fp16")]; tensor var_10304_equation_0 = const()[name = tensor("op_10304_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10304_cast_fp16 = einsum(equation = var_10304_equation_0, values = (var_9868_cast_fp16, var_10211_cast_fp16))[name = tensor("op_10304_cast_fp16")]; tensor var_10306_equation_0 = const()[name = tensor("op_10306_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10306_cast_fp16 = einsum(equation = var_10306_equation_0, values = (var_9868_cast_fp16, var_10212_cast_fp16))[name = tensor("op_10306_cast_fp16")]; tensor var_10308_equation_0 = const()[name = tensor("op_10308_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10308_cast_fp16 = einsum(equation = var_10308_equation_0, values = (var_9868_cast_fp16, var_10213_cast_fp16))[name = tensor("op_10308_cast_fp16")]; tensor var_10310_equation_0 = const()[name = tensor("op_10310_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10310_cast_fp16 = einsum(equation = var_10310_equation_0, values = (var_9868_cast_fp16, var_10214_cast_fp16))[name = tensor("op_10310_cast_fp16")]; tensor var_10312_equation_0 = const()[name = tensor("op_10312_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10312_cast_fp16 = einsum(equation = var_10312_equation_0, values = (var_9872_cast_fp16, var_10215_cast_fp16))[name = tensor("op_10312_cast_fp16")]; tensor var_10314_equation_0 = const()[name = tensor("op_10314_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10314_cast_fp16 = einsum(equation = var_10314_equation_0, values = (var_9872_cast_fp16, var_10216_cast_fp16))[name = tensor("op_10314_cast_fp16")]; tensor var_10316_equation_0 = const()[name = tensor("op_10316_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10316_cast_fp16 = einsum(equation = var_10316_equation_0, values = (var_9872_cast_fp16, var_10217_cast_fp16))[name = tensor("op_10316_cast_fp16")]; tensor var_10318_equation_0 = const()[name = tensor("op_10318_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10318_cast_fp16 = einsum(equation = var_10318_equation_0, values = (var_9872_cast_fp16, var_10218_cast_fp16))[name = tensor("op_10318_cast_fp16")]; tensor var_10320_equation_0 = const()[name = tensor("op_10320_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10320_cast_fp16 = einsum(equation = var_10320_equation_0, values = (var_9872_cast_fp16, var_10219_cast_fp16))[name = tensor("op_10320_cast_fp16")]; tensor var_10322_equation_0 = const()[name = tensor("op_10322_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10322_cast_fp16 = einsum(equation = var_10322_equation_0, values = (var_9872_cast_fp16, var_10220_cast_fp16))[name = tensor("op_10322_cast_fp16")]; tensor var_10324_equation_0 = const()[name = tensor("op_10324_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10324_cast_fp16 = einsum(equation = var_10324_equation_0, values = (var_9876_cast_fp16, var_10221_cast_fp16))[name = tensor("op_10324_cast_fp16")]; tensor var_10326_equation_0 = const()[name = tensor("op_10326_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10326_cast_fp16 = einsum(equation = var_10326_equation_0, values = (var_9876_cast_fp16, var_10222_cast_fp16))[name = tensor("op_10326_cast_fp16")]; tensor var_10328_equation_0 = const()[name = tensor("op_10328_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10328_cast_fp16 = einsum(equation = var_10328_equation_0, values = (var_9876_cast_fp16, var_10223_cast_fp16))[name = tensor("op_10328_cast_fp16")]; tensor var_10330_equation_0 = const()[name = tensor("op_10330_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10330_cast_fp16 = einsum(equation = var_10330_equation_0, values = (var_9876_cast_fp16, var_10224_cast_fp16))[name = tensor("op_10330_cast_fp16")]; tensor var_10332_equation_0 = const()[name = tensor("op_10332_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10332_cast_fp16 = einsum(equation = var_10332_equation_0, values = (var_9876_cast_fp16, var_10225_cast_fp16))[name = tensor("op_10332_cast_fp16")]; tensor var_10334_equation_0 = const()[name = tensor("op_10334_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10334_cast_fp16 = einsum(equation = var_10334_equation_0, values = (var_9876_cast_fp16, var_10226_cast_fp16))[name = tensor("op_10334_cast_fp16")]; tensor var_10336_equation_0 = const()[name = tensor("op_10336_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10336_cast_fp16 = einsum(equation = var_10336_equation_0, values = (var_9880_cast_fp16, var_10227_cast_fp16))[name = tensor("op_10336_cast_fp16")]; tensor var_10338_equation_0 = const()[name = tensor("op_10338_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10338_cast_fp16 = einsum(equation = var_10338_equation_0, values = (var_9880_cast_fp16, var_10228_cast_fp16))[name = tensor("op_10338_cast_fp16")]; tensor var_10340_equation_0 = const()[name = tensor("op_10340_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10340_cast_fp16 = einsum(equation = var_10340_equation_0, values = (var_9880_cast_fp16, var_10229_cast_fp16))[name = tensor("op_10340_cast_fp16")]; tensor var_10342_equation_0 = const()[name = tensor("op_10342_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10342_cast_fp16 = einsum(equation = var_10342_equation_0, values = (var_9880_cast_fp16, var_10230_cast_fp16))[name = tensor("op_10342_cast_fp16")]; tensor var_10344_equation_0 = const()[name = tensor("op_10344_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10344_cast_fp16 = einsum(equation = var_10344_equation_0, values = (var_9880_cast_fp16, var_10231_cast_fp16))[name = tensor("op_10344_cast_fp16")]; tensor var_10346_equation_0 = const()[name = tensor("op_10346_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10346_cast_fp16 = einsum(equation = var_10346_equation_0, values = (var_9880_cast_fp16, var_10232_cast_fp16))[name = tensor("op_10346_cast_fp16")]; tensor var_10348_equation_0 = const()[name = tensor("op_10348_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10348_cast_fp16 = einsum(equation = var_10348_equation_0, values = (var_9884_cast_fp16, var_10233_cast_fp16))[name = tensor("op_10348_cast_fp16")]; tensor var_10350_equation_0 = const()[name = tensor("op_10350_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10350_cast_fp16 = einsum(equation = var_10350_equation_0, values = (var_9884_cast_fp16, var_10234_cast_fp16))[name = tensor("op_10350_cast_fp16")]; tensor var_10352_equation_0 = const()[name = tensor("op_10352_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10352_cast_fp16 = einsum(equation = var_10352_equation_0, values = (var_9884_cast_fp16, var_10235_cast_fp16))[name = tensor("op_10352_cast_fp16")]; tensor var_10354_equation_0 = const()[name = tensor("op_10354_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10354_cast_fp16 = einsum(equation = var_10354_equation_0, values = (var_9884_cast_fp16, var_10236_cast_fp16))[name = tensor("op_10354_cast_fp16")]; tensor var_10356_equation_0 = const()[name = tensor("op_10356_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10356_cast_fp16 = einsum(equation = var_10356_equation_0, values = (var_9884_cast_fp16, var_10237_cast_fp16))[name = tensor("op_10356_cast_fp16")]; tensor var_10358_equation_0 = const()[name = tensor("op_10358_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10358_cast_fp16 = einsum(equation = var_10358_equation_0, values = (var_9884_cast_fp16, var_10238_cast_fp16))[name = tensor("op_10358_cast_fp16")]; tensor var_10360_equation_0 = const()[name = tensor("op_10360_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10360_cast_fp16 = einsum(equation = var_10360_equation_0, values = (var_9888_cast_fp16, var_10239_cast_fp16))[name = tensor("op_10360_cast_fp16")]; tensor var_10362_equation_0 = const()[name = tensor("op_10362_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10362_cast_fp16 = einsum(equation = var_10362_equation_0, values = (var_9888_cast_fp16, var_10240_cast_fp16))[name = tensor("op_10362_cast_fp16")]; tensor var_10364_equation_0 = const()[name = tensor("op_10364_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10364_cast_fp16 = einsum(equation = var_10364_equation_0, values = (var_9888_cast_fp16, var_10241_cast_fp16))[name = tensor("op_10364_cast_fp16")]; tensor var_10366_equation_0 = const()[name = tensor("op_10366_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10366_cast_fp16 = einsum(equation = var_10366_equation_0, values = (var_9888_cast_fp16, var_10242_cast_fp16))[name = tensor("op_10366_cast_fp16")]; tensor var_10368_equation_0 = const()[name = tensor("op_10368_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10368_cast_fp16 = einsum(equation = var_10368_equation_0, values = (var_9888_cast_fp16, var_10243_cast_fp16))[name = tensor("op_10368_cast_fp16")]; tensor var_10370_equation_0 = const()[name = tensor("op_10370_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10370_cast_fp16 = einsum(equation = var_10370_equation_0, values = (var_9888_cast_fp16, var_10244_cast_fp16))[name = tensor("op_10370_cast_fp16")]; tensor var_10372_equation_0 = const()[name = tensor("op_10372_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10372_cast_fp16 = einsum(equation = var_10372_equation_0, values = (var_9892_cast_fp16, var_10245_cast_fp16))[name = tensor("op_10372_cast_fp16")]; tensor var_10374_equation_0 = const()[name = tensor("op_10374_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10374_cast_fp16 = einsum(equation = var_10374_equation_0, values = (var_9892_cast_fp16, var_10246_cast_fp16))[name = tensor("op_10374_cast_fp16")]; tensor var_10376_equation_0 = const()[name = tensor("op_10376_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10376_cast_fp16 = einsum(equation = var_10376_equation_0, values = (var_9892_cast_fp16, var_10247_cast_fp16))[name = tensor("op_10376_cast_fp16")]; tensor var_10378_equation_0 = const()[name = tensor("op_10378_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10378_cast_fp16 = einsum(equation = var_10378_equation_0, values = (var_9892_cast_fp16, var_10248_cast_fp16))[name = tensor("op_10378_cast_fp16")]; tensor var_10380_equation_0 = const()[name = tensor("op_10380_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10380_cast_fp16 = einsum(equation = var_10380_equation_0, values = (var_9892_cast_fp16, var_10249_cast_fp16))[name = tensor("op_10380_cast_fp16")]; tensor var_10382_equation_0 = const()[name = tensor("op_10382_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10382_cast_fp16 = einsum(equation = var_10382_equation_0, values = (var_9892_cast_fp16, var_10250_cast_fp16))[name = tensor("op_10382_cast_fp16")]; tensor var_10384_equation_0 = const()[name = tensor("op_10384_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10384_cast_fp16 = einsum(equation = var_10384_equation_0, values = (var_9896_cast_fp16, var_10251_cast_fp16))[name = tensor("op_10384_cast_fp16")]; tensor var_10386_equation_0 = const()[name = tensor("op_10386_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10386_cast_fp16 = einsum(equation = var_10386_equation_0, values = (var_9896_cast_fp16, var_10252_cast_fp16))[name = tensor("op_10386_cast_fp16")]; tensor var_10388_equation_0 = const()[name = tensor("op_10388_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10388_cast_fp16 = einsum(equation = var_10388_equation_0, values = (var_9896_cast_fp16, var_10253_cast_fp16))[name = tensor("op_10388_cast_fp16")]; tensor var_10390_equation_0 = const()[name = tensor("op_10390_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10390_cast_fp16 = einsum(equation = var_10390_equation_0, values = (var_9896_cast_fp16, var_10254_cast_fp16))[name = tensor("op_10390_cast_fp16")]; tensor var_10392_equation_0 = const()[name = tensor("op_10392_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10392_cast_fp16 = einsum(equation = var_10392_equation_0, values = (var_9896_cast_fp16, var_10255_cast_fp16))[name = tensor("op_10392_cast_fp16")]; tensor var_10394_equation_0 = const()[name = tensor("op_10394_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10394_cast_fp16 = einsum(equation = var_10394_equation_0, values = (var_9896_cast_fp16, var_10256_cast_fp16))[name = tensor("op_10394_cast_fp16")]; tensor var_10396_equation_0 = const()[name = tensor("op_10396_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10396_cast_fp16 = einsum(equation = var_10396_equation_0, values = (var_9900_cast_fp16, var_10257_cast_fp16))[name = tensor("op_10396_cast_fp16")]; tensor var_10398_equation_0 = const()[name = tensor("op_10398_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10398_cast_fp16 = einsum(equation = var_10398_equation_0, values = (var_9900_cast_fp16, var_10258_cast_fp16))[name = tensor("op_10398_cast_fp16")]; tensor var_10400_equation_0 = const()[name = tensor("op_10400_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10400_cast_fp16 = einsum(equation = var_10400_equation_0, values = (var_9900_cast_fp16, var_10259_cast_fp16))[name = tensor("op_10400_cast_fp16")]; tensor var_10402_equation_0 = const()[name = tensor("op_10402_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10402_cast_fp16 = einsum(equation = var_10402_equation_0, values = (var_9900_cast_fp16, var_10260_cast_fp16))[name = tensor("op_10402_cast_fp16")]; tensor var_10404_equation_0 = const()[name = tensor("op_10404_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10404_cast_fp16 = einsum(equation = var_10404_equation_0, values = (var_9900_cast_fp16, var_10261_cast_fp16))[name = tensor("op_10404_cast_fp16")]; tensor var_10406_equation_0 = const()[name = tensor("op_10406_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10406_cast_fp16 = einsum(equation = var_10406_equation_0, values = (var_9900_cast_fp16, var_10262_cast_fp16))[name = tensor("op_10406_cast_fp16")]; tensor var_10408_interleave_0 = const()[name = tensor("op_10408_interleave_0"), val = tensor(false)]; tensor var_10408_cast_fp16 = concat(axis = var_9619, interleave = var_10408_interleave_0, values = (var_10264_cast_fp16, var_10266_cast_fp16, var_10268_cast_fp16, var_10270_cast_fp16, var_10272_cast_fp16, var_10274_cast_fp16))[name = tensor("op_10408_cast_fp16")]; tensor var_10410_interleave_0 = const()[name = tensor("op_10410_interleave_0"), val = tensor(false)]; tensor var_10410_cast_fp16 = concat(axis = var_9619, interleave = var_10410_interleave_0, values = (var_10276_cast_fp16, var_10278_cast_fp16, var_10280_cast_fp16, var_10282_cast_fp16, var_10284_cast_fp16, var_10286_cast_fp16))[name = tensor("op_10410_cast_fp16")]; tensor var_10412_interleave_0 = const()[name = tensor("op_10412_interleave_0"), val = tensor(false)]; tensor var_10412_cast_fp16 = concat(axis = var_9619, interleave = var_10412_interleave_0, values = (var_10288_cast_fp16, var_10290_cast_fp16, var_10292_cast_fp16, var_10294_cast_fp16, var_10296_cast_fp16, var_10298_cast_fp16))[name = tensor("op_10412_cast_fp16")]; tensor var_10414_interleave_0 = const()[name = tensor("op_10414_interleave_0"), val = tensor(false)]; tensor var_10414_cast_fp16 = concat(axis = var_9619, interleave = var_10414_interleave_0, values = (var_10300_cast_fp16, var_10302_cast_fp16, var_10304_cast_fp16, var_10306_cast_fp16, var_10308_cast_fp16, var_10310_cast_fp16))[name = tensor("op_10414_cast_fp16")]; tensor var_10416_interleave_0 = const()[name = tensor("op_10416_interleave_0"), val = tensor(false)]; tensor var_10416_cast_fp16 = concat(axis = var_9619, interleave = var_10416_interleave_0, values = (var_10312_cast_fp16, var_10314_cast_fp16, var_10316_cast_fp16, var_10318_cast_fp16, var_10320_cast_fp16, var_10322_cast_fp16))[name = tensor("op_10416_cast_fp16")]; tensor var_10418_interleave_0 = const()[name = tensor("op_10418_interleave_0"), val = tensor(false)]; tensor var_10418_cast_fp16 = concat(axis = var_9619, interleave = var_10418_interleave_0, values = (var_10324_cast_fp16, var_10326_cast_fp16, var_10328_cast_fp16, var_10330_cast_fp16, var_10332_cast_fp16, var_10334_cast_fp16))[name = tensor("op_10418_cast_fp16")]; tensor var_10420_interleave_0 = const()[name = tensor("op_10420_interleave_0"), val = tensor(false)]; tensor var_10420_cast_fp16 = concat(axis = var_9619, interleave = var_10420_interleave_0, values = (var_10336_cast_fp16, var_10338_cast_fp16, var_10340_cast_fp16, var_10342_cast_fp16, var_10344_cast_fp16, var_10346_cast_fp16))[name = tensor("op_10420_cast_fp16")]; tensor var_10422_interleave_0 = const()[name = tensor("op_10422_interleave_0"), val = tensor(false)]; tensor var_10422_cast_fp16 = concat(axis = var_9619, interleave = var_10422_interleave_0, values = (var_10348_cast_fp16, var_10350_cast_fp16, var_10352_cast_fp16, var_10354_cast_fp16, var_10356_cast_fp16, var_10358_cast_fp16))[name = tensor("op_10422_cast_fp16")]; tensor var_10424_interleave_0 = const()[name = tensor("op_10424_interleave_0"), val = tensor(false)]; tensor var_10424_cast_fp16 = concat(axis = var_9619, interleave = var_10424_interleave_0, values = (var_10360_cast_fp16, var_10362_cast_fp16, var_10364_cast_fp16, var_10366_cast_fp16, var_10368_cast_fp16, var_10370_cast_fp16))[name = tensor("op_10424_cast_fp16")]; tensor var_10426_interleave_0 = const()[name = tensor("op_10426_interleave_0"), val = tensor(false)]; tensor var_10426_cast_fp16 = concat(axis = var_9619, interleave = var_10426_interleave_0, values = (var_10372_cast_fp16, var_10374_cast_fp16, var_10376_cast_fp16, var_10378_cast_fp16, var_10380_cast_fp16, var_10382_cast_fp16))[name = tensor("op_10426_cast_fp16")]; tensor var_10428_interleave_0 = const()[name = tensor("op_10428_interleave_0"), val = tensor(false)]; tensor var_10428_cast_fp16 = concat(axis = var_9619, interleave = var_10428_interleave_0, values = (var_10384_cast_fp16, var_10386_cast_fp16, var_10388_cast_fp16, var_10390_cast_fp16, var_10392_cast_fp16, var_10394_cast_fp16))[name = tensor("op_10428_cast_fp16")]; tensor var_10430_interleave_0 = const()[name = tensor("op_10430_interleave_0"), val = tensor(false)]; tensor var_10430_cast_fp16 = concat(axis = var_9619, interleave = var_10430_interleave_0, values = (var_10396_cast_fp16, var_10398_cast_fp16, var_10400_cast_fp16, var_10402_cast_fp16, var_10404_cast_fp16, var_10406_cast_fp16))[name = tensor("op_10430_cast_fp16")]; tensor input_89_interleave_0 = const()[name = tensor("input_89_interleave_0"), val = tensor(false)]; tensor input_89_cast_fp16 = concat(axis = var_9635, interleave = input_89_interleave_0, values = (var_10408_cast_fp16, var_10410_cast_fp16, var_10412_cast_fp16, var_10414_cast_fp16, var_10416_cast_fp16, var_10418_cast_fp16, var_10420_cast_fp16, var_10422_cast_fp16, var_10424_cast_fp16, var_10426_cast_fp16, var_10428_cast_fp16, var_10430_cast_fp16))[name = tensor("input_89_cast_fp16")]; tensor obj_pad_type_0 = const()[name = tensor("obj_pad_type_0"), val = tensor("valid")]; tensor obj_strides_0 = const()[name = tensor("obj_strides_0"), val = tensor([1, 1])]; tensor obj_pad_0 = const()[name = tensor("obj_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_dilations_0 = const()[name = tensor("obj_dilations_0"), val = tensor([1, 1])]; tensor obj_groups_0 = const()[name = tensor("obj_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165690624)))]; tensor layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166870336)))]; tensor obj_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_89_cast_fp16)[name = tensor("obj_cast_fp16")]; tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; tensor var_10449_to_fp16 = const()[name = tensor("op_10449_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_10449_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; tensor input_91_gamma_0_to_fp16 = const()[name = tensor("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166871936)))]; tensor input_91_beta_0_to_fp16 = const()[name = tensor("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166873536)))]; tensor input_91_epsilon_0_to_fp16 = const()[name = tensor("input_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_91_cast_fp16")]; tensor input_93_pad_type_0 = const()[name = tensor("input_93_pad_type_0"), val = tensor("valid")]; tensor input_93_strides_0 = const()[name = tensor("input_93_strides_0"), val = tensor([1, 1])]; tensor input_93_pad_0 = const()[name = tensor("input_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_93_dilations_0 = const()[name = tensor("input_93_dilations_0"), val = tensor([1, 1])]; tensor input_93_groups_0 = const()[name = tensor("input_93_groups_0"), val = tensor(1)]; tensor layers_11_fc1_weight_to_fp16 = const()[name = tensor("layers_11_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166875136)))]; tensor layers_11_fc1_bias_to_fp16 = const()[name = tensor("layers_11_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171593792)))]; tensor input_93_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = input_93_dilations_0, groups = input_93_groups_0, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = input_93_strides_0, weight = layers_11_fc1_weight_to_fp16, x = input_91_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_93_cast_fp16)[name = tensor("input_cast_fp16")]; tensor hidden_states_pad_type_0 = const()[name = tensor("hidden_states_pad_type_0"), val = tensor("valid")]; tensor hidden_states_strides_0 = const()[name = tensor("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = tensor("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = tensor("hidden_states_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_groups_0 = const()[name = tensor("hidden_states_groups_0"), val = tensor(1)]; tensor layers_11_fc2_weight_to_fp16 = const()[name = tensor("layers_11_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171600000)))]; tensor layers_11_fc2_bias_to_fp16 = const()[name = tensor("layers_11_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176318656)))]; tensor hidden_states_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = layers_11_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; tensor inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; tensor var_10487_to_fp16 = const()[name = tensor("op_10487_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_10487_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176320256)))]; tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176321856)))]; tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; } -> (encoder_output_embeds); }