program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})] { func main(tensor melspectrogram_features) { tensor var_50_pad_type_0 = const()[name = tensor("op_50_pad_type_0"), val = tensor("custom")]; tensor var_50_pad_0 = const()[name = tensor("op_50_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_50_strides_0 = const()[name = tensor("op_50_strides_0"), val = tensor([1, 1])]; tensor var_50_dilations_0 = const()[name = tensor("op_50_dilations_0"), val = tensor([1, 1])]; tensor var_50_groups_0 = const()[name = tensor("op_50_groups_0"), val = tensor(1)]; tensor var_25_to_fp16 = const()[name = tensor("op_25_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor var_31_to_fp16 = const()[name = tensor("op_31_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184448)))]; tensor var_50_cast_fp16 = conv(bias = var_31_to_fp16, dilations = var_50_dilations_0, groups = var_50_groups_0, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_50_strides_0, weight = var_25_to_fp16, x = melspectrogram_features)[name = tensor("op_50_cast_fp16")]; tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_50_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; tensor var_90_pad_type_0 = const()[name = tensor("op_90_pad_type_0"), val = tensor("custom")]; tensor var_90_pad_0 = const()[name = tensor("op_90_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_90_strides_0 = const()[name = tensor("op_90_strides_0"), val = tensor([2, 2])]; tensor var_90_dilations_0 = const()[name = tensor("op_90_dilations_0"), val = tensor([1, 1])]; tensor var_90_groups_0 = const()[name = tensor("op_90_groups_0"), val = tensor(1)]; tensor var_65_to_fp16 = const()[name = tensor("op_65_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185280)))]; tensor var_71_to_fp16 = const()[name = tensor("op_71_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1070080)))]; tensor var_90_cast_fp16 = conv(bias = var_71_to_fp16, dilations = var_90_dilations_0, groups = var_90_groups_0, pad = var_90_pad_0, pad_type = var_90_pad_type_0, strides = var_90_strides_0, weight = var_65_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_90_cast_fp16")]; tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_90_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; tensor var_108_to_fp16 = const()[name = tensor("op_108_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1070912)))]; tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_108_to_fp16)[name = tensor("inputs_1_cast_fp16")]; tensor var_121 = const()[name = tensor("op_121"), val = tensor(3)]; tensor var_133 = const()[name = tensor("op_133"), val = tensor(1)]; tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; tensor var_150_to_fp16 = const()[name = tensor("op_150_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_150_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2222976)))]; tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2223808)))]; tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2224640)))]; tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2225472)))]; tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; tensor query_1_pad_type_0 = const()[name = tensor("query_1_pad_type_0"), val = tensor("valid")]; tensor query_1_strides_0 = const()[name = tensor("query_1_strides_0"), val = tensor([1, 1])]; tensor query_1_pad_0 = const()[name = tensor("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_1_dilations_0 = const()[name = tensor("query_1_dilations_0"), val = tensor([1, 1])]; tensor query_1_groups_0 = const()[name = tensor("query_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2226304)))]; tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2521280)))]; tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = query_1_dilations_0, groups = query_1_groups_0, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = query_1_strides_0, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("query_1_cast_fp16")]; tensor key_1_pad_type_0 = const()[name = tensor("key_1_pad_type_0"), val = tensor("valid")]; tensor key_1_strides_0 = const()[name = tensor("key_1_strides_0"), val = tensor([1, 1])]; tensor key_1_pad_0 = const()[name = tensor("key_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_1_dilations_0 = const()[name = tensor("key_1_dilations_0"), val = tensor([1, 1])]; tensor key_1_groups_0 = const()[name = tensor("key_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2522112)))]; tensor key_1_cast_fp16 = conv(dilations = key_1_dilations_0, groups = key_1_groups_0, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = key_1_strides_0, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("key_1_cast_fp16")]; tensor value_1_pad_type_0 = const()[name = tensor("value_1_pad_type_0"), val = tensor("valid")]; tensor value_1_strides_0 = const()[name = tensor("value_1_strides_0"), val = tensor([1, 1])]; tensor value_1_pad_0 = const()[name = tensor("value_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_1_dilations_0 = const()[name = tensor("value_1_dilations_0"), val = tensor([1, 1])]; tensor value_1_groups_0 = const()[name = tensor("value_1_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2817088)))]; tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3112064)))]; tensor value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = value_1_dilations_0, groups = value_1_groups_0, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = value_1_strides_0, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("value_1_cast_fp16")]; tensor var_185_begin_0 = const()[name = tensor("op_185_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_185_end_0 = const()[name = tensor("op_185_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_185_end_mask_0 = const()[name = tensor("op_185_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_185_cast_fp16 = slice_by_index(begin = var_185_begin_0, end = var_185_end_0, end_mask = var_185_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_185_cast_fp16")]; tensor var_189_begin_0 = const()[name = tensor("op_189_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_189_end_0 = const()[name = tensor("op_189_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_189_end_mask_0 = const()[name = tensor("op_189_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_189_cast_fp16 = slice_by_index(begin = var_189_begin_0, end = var_189_end_0, end_mask = var_189_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_189_cast_fp16")]; tensor var_193_begin_0 = const()[name = tensor("op_193_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_193_end_0 = const()[name = tensor("op_193_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_193_end_mask_0 = const()[name = tensor("op_193_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_193_cast_fp16 = slice_by_index(begin = var_193_begin_0, end = var_193_end_0, end_mask = var_193_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_193_cast_fp16")]; tensor var_197_begin_0 = const()[name = tensor("op_197_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_197_end_0 = const()[name = tensor("op_197_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_197_end_mask_0 = const()[name = tensor("op_197_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_197_cast_fp16 = slice_by_index(begin = var_197_begin_0, end = var_197_end_0, end_mask = var_197_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_197_cast_fp16")]; tensor var_201_begin_0 = const()[name = tensor("op_201_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_201_end_0 = const()[name = tensor("op_201_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_201_end_mask_0 = const()[name = tensor("op_201_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_201_cast_fp16 = slice_by_index(begin = var_201_begin_0, end = var_201_end_0, end_mask = var_201_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_201_cast_fp16")]; tensor var_205_begin_0 = const()[name = tensor("op_205_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_205_end_0 = const()[name = tensor("op_205_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_205_end_mask_0 = const()[name = tensor("op_205_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_205_cast_fp16 = slice_by_index(begin = var_205_begin_0, end = var_205_end_0, end_mask = var_205_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_205_cast_fp16")]; tensor var_208_begin_0 = const()[name = tensor("op_208_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_208_end_0 = const()[name = tensor("op_208_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_208_end_mask_0 = const()[name = tensor("op_208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_208_cast_fp16 = slice_by_index(begin = var_208_begin_0, end = var_208_end_0, end_mask = var_208_end_mask_0, x = var_185_cast_fp16)[name = tensor("op_208_cast_fp16")]; tensor var_209_begin_0 = const()[name = tensor("op_209_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_209_end_0 = const()[name = tensor("op_209_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_209_end_mask_0 = const()[name = tensor("op_209_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_209_cast_fp16 = slice_by_index(begin = var_209_begin_0, end = var_209_end_0, end_mask = var_209_end_mask_0, x = var_185_cast_fp16)[name = tensor("op_209_cast_fp16")]; tensor var_210_begin_0 = const()[name = tensor("op_210_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_210_end_0 = const()[name = tensor("op_210_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_210_end_mask_0 = const()[name = tensor("op_210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_210_cast_fp16 = slice_by_index(begin = var_210_begin_0, end = var_210_end_0, end_mask = var_210_end_mask_0, x = var_185_cast_fp16)[name = tensor("op_210_cast_fp16")]; tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = var_185_cast_fp16)[name = tensor("op_211_cast_fp16")]; tensor var_212_begin_0 = const()[name = tensor("op_212_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_212_end_0 = const()[name = tensor("op_212_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_212_end_mask_0 = const()[name = tensor("op_212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_212_cast_fp16 = slice_by_index(begin = var_212_begin_0, end = var_212_end_0, end_mask = var_212_end_mask_0, x = var_185_cast_fp16)[name = tensor("op_212_cast_fp16")]; tensor var_213_begin_0 = const()[name = tensor("op_213_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_213_end_0 = const()[name = tensor("op_213_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_213_end_mask_0 = const()[name = tensor("op_213_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_213_cast_fp16 = slice_by_index(begin = var_213_begin_0, end = var_213_end_0, end_mask = var_213_end_mask_0, x = var_185_cast_fp16)[name = tensor("op_213_cast_fp16")]; tensor var_214_begin_0 = const()[name = tensor("op_214_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_214_end_0 = const()[name = tensor("op_214_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_214_end_mask_0 = const()[name = tensor("op_214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_214_cast_fp16 = slice_by_index(begin = var_214_begin_0, end = var_214_end_0, end_mask = var_214_end_mask_0, x = var_189_cast_fp16)[name = tensor("op_214_cast_fp16")]; tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = var_189_cast_fp16)[name = tensor("op_215_cast_fp16")]; tensor var_216_begin_0 = const()[name = tensor("op_216_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_216_end_0 = const()[name = tensor("op_216_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_216_end_mask_0 = const()[name = tensor("op_216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_216_cast_fp16 = slice_by_index(begin = var_216_begin_0, end = var_216_end_0, end_mask = var_216_end_mask_0, x = var_189_cast_fp16)[name = tensor("op_216_cast_fp16")]; tensor var_217_begin_0 = const()[name = tensor("op_217_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_217_end_0 = const()[name = tensor("op_217_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_217_end_mask_0 = const()[name = tensor("op_217_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_217_cast_fp16 = slice_by_index(begin = var_217_begin_0, end = var_217_end_0, end_mask = var_217_end_mask_0, x = var_189_cast_fp16)[name = tensor("op_217_cast_fp16")]; tensor var_218_begin_0 = const()[name = tensor("op_218_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_218_end_0 = const()[name = tensor("op_218_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_218_end_mask_0 = const()[name = tensor("op_218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_218_cast_fp16 = slice_by_index(begin = var_218_begin_0, end = var_218_end_0, end_mask = var_218_end_mask_0, x = var_189_cast_fp16)[name = tensor("op_218_cast_fp16")]; tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = var_189_cast_fp16)[name = tensor("op_219_cast_fp16")]; tensor var_220_begin_0 = const()[name = tensor("op_220_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_220_end_0 = const()[name = tensor("op_220_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_220_end_mask_0 = const()[name = tensor("op_220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_220_cast_fp16 = slice_by_index(begin = var_220_begin_0, end = var_220_end_0, end_mask = var_220_end_mask_0, x = var_193_cast_fp16)[name = tensor("op_220_cast_fp16")]; tensor var_221_begin_0 = const()[name = tensor("op_221_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_221_end_0 = const()[name = tensor("op_221_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_221_end_mask_0 = const()[name = tensor("op_221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_221_cast_fp16 = slice_by_index(begin = var_221_begin_0, end = var_221_end_0, end_mask = var_221_end_mask_0, x = var_193_cast_fp16)[name = tensor("op_221_cast_fp16")]; tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = var_193_cast_fp16)[name = tensor("op_222_cast_fp16")]; tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = var_193_cast_fp16)[name = tensor("op_223_cast_fp16")]; tensor var_224_begin_0 = const()[name = tensor("op_224_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_224_end_0 = const()[name = tensor("op_224_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_224_end_mask_0 = const()[name = tensor("op_224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_224_cast_fp16 = slice_by_index(begin = var_224_begin_0, end = var_224_end_0, end_mask = var_224_end_mask_0, x = var_193_cast_fp16)[name = tensor("op_224_cast_fp16")]; tensor var_225_begin_0 = const()[name = tensor("op_225_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_225_end_0 = const()[name = tensor("op_225_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_225_end_mask_0 = const()[name = tensor("op_225_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_225_cast_fp16 = slice_by_index(begin = var_225_begin_0, end = var_225_end_0, end_mask = var_225_end_mask_0, x = var_193_cast_fp16)[name = tensor("op_225_cast_fp16")]; tensor var_226_begin_0 = const()[name = tensor("op_226_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_226_end_0 = const()[name = tensor("op_226_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_226_end_mask_0 = const()[name = tensor("op_226_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_226_cast_fp16 = slice_by_index(begin = var_226_begin_0, end = var_226_end_0, end_mask = var_226_end_mask_0, x = var_197_cast_fp16)[name = tensor("op_226_cast_fp16")]; tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = var_197_cast_fp16)[name = tensor("op_227_cast_fp16")]; tensor var_228_begin_0 = const()[name = tensor("op_228_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_228_end_0 = const()[name = tensor("op_228_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_228_end_mask_0 = const()[name = tensor("op_228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_228_cast_fp16 = slice_by_index(begin = var_228_begin_0, end = var_228_end_0, end_mask = var_228_end_mask_0, x = var_197_cast_fp16)[name = tensor("op_228_cast_fp16")]; tensor var_229_begin_0 = const()[name = tensor("op_229_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_229_end_0 = const()[name = tensor("op_229_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_229_end_mask_0 = const()[name = tensor("op_229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_229_cast_fp16 = slice_by_index(begin = var_229_begin_0, end = var_229_end_0, end_mask = var_229_end_mask_0, x = var_197_cast_fp16)[name = tensor("op_229_cast_fp16")]; tensor var_230_begin_0 = const()[name = tensor("op_230_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_230_end_0 = const()[name = tensor("op_230_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_230_end_mask_0 = const()[name = tensor("op_230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_230_cast_fp16 = slice_by_index(begin = var_230_begin_0, end = var_230_end_0, end_mask = var_230_end_mask_0, x = var_197_cast_fp16)[name = tensor("op_230_cast_fp16")]; tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = var_197_cast_fp16)[name = tensor("op_231_cast_fp16")]; tensor var_232_begin_0 = const()[name = tensor("op_232_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_232_end_0 = const()[name = tensor("op_232_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_232_end_mask_0 = const()[name = tensor("op_232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_232_cast_fp16 = slice_by_index(begin = var_232_begin_0, end = var_232_end_0, end_mask = var_232_end_mask_0, x = var_201_cast_fp16)[name = tensor("op_232_cast_fp16")]; tensor var_233_begin_0 = const()[name = tensor("op_233_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_233_end_0 = const()[name = tensor("op_233_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_233_end_mask_0 = const()[name = tensor("op_233_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_233_cast_fp16 = slice_by_index(begin = var_233_begin_0, end = var_233_end_0, end_mask = var_233_end_mask_0, x = var_201_cast_fp16)[name = tensor("op_233_cast_fp16")]; tensor var_234_begin_0 = const()[name = tensor("op_234_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_234_end_0 = const()[name = tensor("op_234_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_234_end_mask_0 = const()[name = tensor("op_234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_234_cast_fp16 = slice_by_index(begin = var_234_begin_0, end = var_234_end_0, end_mask = var_234_end_mask_0, x = var_201_cast_fp16)[name = tensor("op_234_cast_fp16")]; tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = var_201_cast_fp16)[name = tensor("op_235_cast_fp16")]; tensor var_236_begin_0 = const()[name = tensor("op_236_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_236_end_0 = const()[name = tensor("op_236_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_236_end_mask_0 = const()[name = tensor("op_236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_236_cast_fp16 = slice_by_index(begin = var_236_begin_0, end = var_236_end_0, end_mask = var_236_end_mask_0, x = var_201_cast_fp16)[name = tensor("op_236_cast_fp16")]; tensor var_237_begin_0 = const()[name = tensor("op_237_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_237_end_0 = const()[name = tensor("op_237_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_237_end_mask_0 = const()[name = tensor("op_237_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_237_cast_fp16 = slice_by_index(begin = var_237_begin_0, end = var_237_end_0, end_mask = var_237_end_mask_0, x = var_201_cast_fp16)[name = tensor("op_237_cast_fp16")]; tensor var_238_begin_0 = const()[name = tensor("op_238_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_238_end_0 = const()[name = tensor("op_238_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_238_end_mask_0 = const()[name = tensor("op_238_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_238_cast_fp16 = slice_by_index(begin = var_238_begin_0, end = var_238_end_0, end_mask = var_238_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_238_cast_fp16")]; tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_239_cast_fp16")]; tensor var_240_begin_0 = const()[name = tensor("op_240_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_240_end_0 = const()[name = tensor("op_240_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_240_end_mask_0 = const()[name = tensor("op_240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_240_cast_fp16 = slice_by_index(begin = var_240_begin_0, end = var_240_end_0, end_mask = var_240_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_240_cast_fp16")]; tensor var_241_begin_0 = const()[name = tensor("op_241_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_241_end_0 = const()[name = tensor("op_241_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_241_end_mask_0 = const()[name = tensor("op_241_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_241_cast_fp16 = slice_by_index(begin = var_241_begin_0, end = var_241_end_0, end_mask = var_241_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_241_cast_fp16")]; tensor var_242_begin_0 = const()[name = tensor("op_242_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_242_end_0 = const()[name = tensor("op_242_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_242_end_mask_0 = const()[name = tensor("op_242_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_242_cast_fp16")]; tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = var_205_cast_fp16)[name = tensor("op_243_cast_fp16")]; tensor k_1_perm_0 = const()[name = tensor("k_1_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_248_begin_0 = const()[name = tensor("op_248_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_248_end_0 = const()[name = tensor("op_248_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_248_end_mask_0 = const()[name = tensor("op_248_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor("transpose_3")]; tensor var_248_cast_fp16 = slice_by_index(begin = var_248_begin_0, end = var_248_end_0, end_mask = var_248_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_248_cast_fp16")]; tensor var_252_begin_0 = const()[name = tensor("op_252_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_252_end_0 = const()[name = tensor("op_252_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_252_end_mask_0 = const()[name = tensor("op_252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_252_cast_fp16 = slice_by_index(begin = var_252_begin_0, end = var_252_end_0, end_mask = var_252_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_252_cast_fp16")]; tensor var_256_begin_0 = const()[name = tensor("op_256_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_256_end_0 = const()[name = tensor("op_256_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_256_end_mask_0 = const()[name = tensor("op_256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_256_cast_fp16 = slice_by_index(begin = var_256_begin_0, end = var_256_end_0, end_mask = var_256_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_256_cast_fp16")]; tensor var_260_begin_0 = const()[name = tensor("op_260_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_260_end_0 = const()[name = tensor("op_260_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_260_end_mask_0 = const()[name = tensor("op_260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_260_cast_fp16 = slice_by_index(begin = var_260_begin_0, end = var_260_end_0, end_mask = var_260_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_260_cast_fp16")]; tensor var_264_begin_0 = const()[name = tensor("op_264_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_264_end_0 = const()[name = tensor("op_264_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_264_end_mask_0 = const()[name = tensor("op_264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_264_cast_fp16 = slice_by_index(begin = var_264_begin_0, end = var_264_end_0, end_mask = var_264_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_264_cast_fp16")]; tensor var_268_begin_0 = const()[name = tensor("op_268_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_268_end_0 = const()[name = tensor("op_268_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_268_end_mask_0 = const()[name = tensor("op_268_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_268_cast_fp16 = slice_by_index(begin = var_268_begin_0, end = var_268_end_0, end_mask = var_268_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_268_cast_fp16")]; tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_270_cast_fp16")]; tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_274_cast_fp16")]; tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_278_cast_fp16")]; tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_282_cast_fp16")]; tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_286_cast_fp16")]; tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_290_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1_equation_0, values = (var_248_cast_fp16, var_208_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3_equation_0, values = (var_248_cast_fp16, var_209_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5_equation_0, values = (var_248_cast_fp16, var_210_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5_cast_fp16")]; tensor _SplitHeadsQ__mh_w_7_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_7_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_7_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_7_equation_0, values = (var_248_cast_fp16, var_211_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_7_cast_fp16")]; tensor _SplitHeadsQ__mh_w_9_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_9_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_9_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_9_equation_0, values = (var_248_cast_fp16, var_212_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_9_cast_fp16")]; tensor _SplitHeadsQ__mh_w_11_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_11_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_11_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_11_equation_0, values = (var_248_cast_fp16, var_213_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_11_cast_fp16")]; tensor _SplitHeadsQ__mh_w_13_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_13_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_13_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_13_equation_0, values = (var_252_cast_fp16, var_214_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_13_cast_fp16")]; tensor _SplitHeadsQ__mh_w_15_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_15_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_15_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_15_equation_0, values = (var_252_cast_fp16, var_215_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_15_cast_fp16")]; tensor _SplitHeadsQ__mh_w_17_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_17_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_17_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_17_equation_0, values = (var_252_cast_fp16, var_216_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_17_cast_fp16")]; tensor _SplitHeadsQ__mh_w_19_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_19_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_19_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_19_equation_0, values = (var_252_cast_fp16, var_217_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_19_cast_fp16")]; tensor _SplitHeadsQ__mh_w_21_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_21_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_21_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_21_equation_0, values = (var_252_cast_fp16, var_218_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_21_cast_fp16")]; tensor _SplitHeadsQ__mh_w_23_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_23_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_23_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_23_equation_0, values = (var_252_cast_fp16, var_219_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_23_cast_fp16")]; tensor _SplitHeadsQ__mh_w_25_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_25_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_25_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_25_equation_0, values = (var_256_cast_fp16, var_220_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_25_cast_fp16")]; tensor _SplitHeadsQ__mh_w_27_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_27_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_27_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_27_equation_0, values = (var_256_cast_fp16, var_221_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_27_cast_fp16")]; tensor _SplitHeadsQ__mh_w_29_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_29_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_29_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_29_equation_0, values = (var_256_cast_fp16, var_222_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_29_cast_fp16")]; tensor _SplitHeadsQ__mh_w_31_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_31_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_31_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_31_equation_0, values = (var_256_cast_fp16, var_223_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_31_cast_fp16")]; tensor _SplitHeadsQ__mh_w_33_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_33_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_33_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_33_equation_0, values = (var_256_cast_fp16, var_224_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_33_cast_fp16")]; tensor _SplitHeadsQ__mh_w_35_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_35_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_35_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_35_equation_0, values = (var_256_cast_fp16, var_225_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_35_cast_fp16")]; tensor _SplitHeadsQ__mh_w_37_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_37_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_37_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_37_equation_0, values = (var_260_cast_fp16, var_226_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_37_cast_fp16")]; tensor _SplitHeadsQ__mh_w_39_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_39_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_39_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_39_equation_0, values = (var_260_cast_fp16, var_227_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_39_cast_fp16")]; tensor _SplitHeadsQ__mh_w_41_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_41_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_41_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_41_equation_0, values = (var_260_cast_fp16, var_228_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_41_cast_fp16")]; tensor _SplitHeadsQ__mh_w_43_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_43_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_43_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_43_equation_0, values = (var_260_cast_fp16, var_229_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_43_cast_fp16")]; tensor _SplitHeadsQ__mh_w_45_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_45_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_45_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_45_equation_0, values = (var_260_cast_fp16, var_230_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_45_cast_fp16")]; tensor _SplitHeadsQ__mh_w_47_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_47_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_47_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_47_equation_0, values = (var_260_cast_fp16, var_231_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_47_cast_fp16")]; tensor _SplitHeadsQ__mh_w_49_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_49_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_49_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_49_equation_0, values = (var_264_cast_fp16, var_232_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_49_cast_fp16")]; tensor _SplitHeadsQ__mh_w_51_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_51_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_51_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_51_equation_0, values = (var_264_cast_fp16, var_233_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_51_cast_fp16")]; tensor _SplitHeadsQ__mh_w_53_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_53_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_53_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_53_equation_0, values = (var_264_cast_fp16, var_234_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_53_cast_fp16")]; tensor _SplitHeadsQ__mh_w_55_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_55_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_55_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_55_equation_0, values = (var_264_cast_fp16, var_235_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_55_cast_fp16")]; tensor _SplitHeadsQ__mh_w_57_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_57_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_57_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_57_equation_0, values = (var_264_cast_fp16, var_236_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_57_cast_fp16")]; tensor _SplitHeadsQ__mh_w_59_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_59_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_59_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_59_equation_0, values = (var_264_cast_fp16, var_237_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_59_cast_fp16")]; tensor _SplitHeadsQ__mh_w_61_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_61_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_61_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_61_equation_0, values = (var_268_cast_fp16, var_238_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_61_cast_fp16")]; tensor _SplitHeadsQ__mh_w_63_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_63_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_63_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_63_equation_0, values = (var_268_cast_fp16, var_239_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_63_cast_fp16")]; tensor _SplitHeadsQ__mh_w_65_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_65_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_65_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_65_equation_0, values = (var_268_cast_fp16, var_240_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_65_cast_fp16")]; tensor _SplitHeadsQ__mh_w_67_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_67_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_67_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_67_equation_0, values = (var_268_cast_fp16, var_241_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_67_cast_fp16")]; tensor _SplitHeadsQ__mh_w_69_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_69_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_69_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_69_equation_0, values = (var_268_cast_fp16, var_242_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_69_cast_fp16")]; tensor _SplitHeadsQ__mh_w_71_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_71_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_71_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_71_equation_0, values = (var_268_cast_fp16, var_243_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_71_cast_fp16")]; tensor var_365_to_fp16 = const()[name = tensor("op_365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1_cast_fp16, y = var_365_to_fp16)[name = tensor("aw_chunk_1_cast_fp16")]; tensor var_367_to_fp16 = const()[name = tensor("op_367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3_cast_fp16, y = var_367_to_fp16)[name = tensor("aw_chunk_3_cast_fp16")]; tensor var_369_to_fp16 = const()[name = tensor("op_369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5_cast_fp16, y = var_369_to_fp16)[name = tensor("aw_chunk_5_cast_fp16")]; tensor var_371_to_fp16 = const()[name = tensor("op_371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_7_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_7_cast_fp16, y = var_371_to_fp16)[name = tensor("aw_chunk_7_cast_fp16")]; tensor var_373_to_fp16 = const()[name = tensor("op_373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_9_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_9_cast_fp16, y = var_373_to_fp16)[name = tensor("aw_chunk_9_cast_fp16")]; tensor var_375_to_fp16 = const()[name = tensor("op_375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_11_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_11_cast_fp16, y = var_375_to_fp16)[name = tensor("aw_chunk_11_cast_fp16")]; tensor var_377_to_fp16 = const()[name = tensor("op_377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_13_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_13_cast_fp16, y = var_377_to_fp16)[name = tensor("aw_chunk_13_cast_fp16")]; tensor var_379_to_fp16 = const()[name = tensor("op_379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_15_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_15_cast_fp16, y = var_379_to_fp16)[name = tensor("aw_chunk_15_cast_fp16")]; tensor var_381_to_fp16 = const()[name = tensor("op_381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_17_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_17_cast_fp16, y = var_381_to_fp16)[name = tensor("aw_chunk_17_cast_fp16")]; tensor var_383_to_fp16 = const()[name = tensor("op_383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_19_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_19_cast_fp16, y = var_383_to_fp16)[name = tensor("aw_chunk_19_cast_fp16")]; tensor var_385_to_fp16 = const()[name = tensor("op_385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_21_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_21_cast_fp16, y = var_385_to_fp16)[name = tensor("aw_chunk_21_cast_fp16")]; tensor var_387_to_fp16 = const()[name = tensor("op_387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_23_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_23_cast_fp16, y = var_387_to_fp16)[name = tensor("aw_chunk_23_cast_fp16")]; tensor var_389_to_fp16 = const()[name = tensor("op_389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_25_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_25_cast_fp16, y = var_389_to_fp16)[name = tensor("aw_chunk_25_cast_fp16")]; tensor var_391_to_fp16 = const()[name = tensor("op_391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_27_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_27_cast_fp16, y = var_391_to_fp16)[name = tensor("aw_chunk_27_cast_fp16")]; tensor var_393_to_fp16 = const()[name = tensor("op_393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_29_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_29_cast_fp16, y = var_393_to_fp16)[name = tensor("aw_chunk_29_cast_fp16")]; tensor var_395_to_fp16 = const()[name = tensor("op_395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_31_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_31_cast_fp16, y = var_395_to_fp16)[name = tensor("aw_chunk_31_cast_fp16")]; tensor var_397_to_fp16 = const()[name = tensor("op_397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_33_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_33_cast_fp16, y = var_397_to_fp16)[name = tensor("aw_chunk_33_cast_fp16")]; tensor var_399_to_fp16 = const()[name = tensor("op_399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_35_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_35_cast_fp16, y = var_399_to_fp16)[name = tensor("aw_chunk_35_cast_fp16")]; tensor var_401_to_fp16 = const()[name = tensor("op_401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_37_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_37_cast_fp16, y = var_401_to_fp16)[name = tensor("aw_chunk_37_cast_fp16")]; tensor var_403_to_fp16 = const()[name = tensor("op_403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_39_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_39_cast_fp16, y = var_403_to_fp16)[name = tensor("aw_chunk_39_cast_fp16")]; tensor var_405_to_fp16 = const()[name = tensor("op_405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_41_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_41_cast_fp16, y = var_405_to_fp16)[name = tensor("aw_chunk_41_cast_fp16")]; tensor var_407_to_fp16 = const()[name = tensor("op_407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_43_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_43_cast_fp16, y = var_407_to_fp16)[name = tensor("aw_chunk_43_cast_fp16")]; tensor var_409_to_fp16 = const()[name = tensor("op_409_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_45_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_45_cast_fp16, y = var_409_to_fp16)[name = tensor("aw_chunk_45_cast_fp16")]; tensor var_411_to_fp16 = const()[name = tensor("op_411_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_47_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_47_cast_fp16, y = var_411_to_fp16)[name = tensor("aw_chunk_47_cast_fp16")]; tensor var_413_to_fp16 = const()[name = tensor("op_413_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_49_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_49_cast_fp16, y = var_413_to_fp16)[name = tensor("aw_chunk_49_cast_fp16")]; tensor var_415_to_fp16 = const()[name = tensor("op_415_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_51_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_51_cast_fp16, y = var_415_to_fp16)[name = tensor("aw_chunk_51_cast_fp16")]; tensor var_417_to_fp16 = const()[name = tensor("op_417_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_53_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_53_cast_fp16, y = var_417_to_fp16)[name = tensor("aw_chunk_53_cast_fp16")]; tensor var_419_to_fp16 = const()[name = tensor("op_419_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_55_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_55_cast_fp16, y = var_419_to_fp16)[name = tensor("aw_chunk_55_cast_fp16")]; tensor var_421_to_fp16 = const()[name = tensor("op_421_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_57_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_57_cast_fp16, y = var_421_to_fp16)[name = tensor("aw_chunk_57_cast_fp16")]; tensor var_423_to_fp16 = const()[name = tensor("op_423_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_59_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_59_cast_fp16, y = var_423_to_fp16)[name = tensor("aw_chunk_59_cast_fp16")]; tensor var_425_to_fp16 = const()[name = tensor("op_425_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_61_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_61_cast_fp16, y = var_425_to_fp16)[name = tensor("aw_chunk_61_cast_fp16")]; tensor var_427_to_fp16 = const()[name = tensor("op_427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_63_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_63_cast_fp16, y = var_427_to_fp16)[name = tensor("aw_chunk_63_cast_fp16")]; tensor var_429_to_fp16 = const()[name = tensor("op_429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_65_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_65_cast_fp16, y = var_429_to_fp16)[name = tensor("aw_chunk_65_cast_fp16")]; tensor var_431_to_fp16 = const()[name = tensor("op_431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_67_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_67_cast_fp16, y = var_431_to_fp16)[name = tensor("aw_chunk_67_cast_fp16")]; tensor var_433_to_fp16 = const()[name = tensor("op_433_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_69_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_69_cast_fp16, y = var_433_to_fp16)[name = tensor("aw_chunk_69_cast_fp16")]; tensor var_435_to_fp16 = const()[name = tensor("op_435_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_71_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_71_cast_fp16, y = var_435_to_fp16)[name = tensor("aw_chunk_71_cast_fp16")]; tensor var_437_cast_fp16 = softmax(axis = var_133, x = aw_chunk_1_cast_fp16)[name = tensor("op_437_cast_fp16")]; tensor var_438_cast_fp16 = softmax(axis = var_133, x = aw_chunk_3_cast_fp16)[name = tensor("op_438_cast_fp16")]; tensor var_439_cast_fp16 = softmax(axis = var_133, x = aw_chunk_5_cast_fp16)[name = tensor("op_439_cast_fp16")]; tensor var_440_cast_fp16 = softmax(axis = var_133, x = aw_chunk_7_cast_fp16)[name = tensor("op_440_cast_fp16")]; tensor var_441_cast_fp16 = softmax(axis = var_133, x = aw_chunk_9_cast_fp16)[name = tensor("op_441_cast_fp16")]; tensor var_442_cast_fp16 = softmax(axis = var_133, x = aw_chunk_11_cast_fp16)[name = tensor("op_442_cast_fp16")]; tensor var_443_cast_fp16 = softmax(axis = var_133, x = aw_chunk_13_cast_fp16)[name = tensor("op_443_cast_fp16")]; tensor var_444_cast_fp16 = softmax(axis = var_133, x = aw_chunk_15_cast_fp16)[name = tensor("op_444_cast_fp16")]; tensor var_445_cast_fp16 = softmax(axis = var_133, x = aw_chunk_17_cast_fp16)[name = tensor("op_445_cast_fp16")]; tensor var_446_cast_fp16 = softmax(axis = var_133, x = aw_chunk_19_cast_fp16)[name = tensor("op_446_cast_fp16")]; tensor var_447_cast_fp16 = softmax(axis = var_133, x = aw_chunk_21_cast_fp16)[name = tensor("op_447_cast_fp16")]; tensor var_448_cast_fp16 = softmax(axis = var_133, x = aw_chunk_23_cast_fp16)[name = tensor("op_448_cast_fp16")]; tensor var_449_cast_fp16 = softmax(axis = var_133, x = aw_chunk_25_cast_fp16)[name = tensor("op_449_cast_fp16")]; tensor var_450_cast_fp16 = softmax(axis = var_133, x = aw_chunk_27_cast_fp16)[name = tensor("op_450_cast_fp16")]; tensor var_451_cast_fp16 = softmax(axis = var_133, x = aw_chunk_29_cast_fp16)[name = tensor("op_451_cast_fp16")]; tensor var_452_cast_fp16 = softmax(axis = var_133, x = aw_chunk_31_cast_fp16)[name = tensor("op_452_cast_fp16")]; tensor var_453_cast_fp16 = softmax(axis = var_133, x = aw_chunk_33_cast_fp16)[name = tensor("op_453_cast_fp16")]; tensor var_454_cast_fp16 = softmax(axis = var_133, x = aw_chunk_35_cast_fp16)[name = tensor("op_454_cast_fp16")]; tensor var_455_cast_fp16 = softmax(axis = var_133, x = aw_chunk_37_cast_fp16)[name = tensor("op_455_cast_fp16")]; tensor var_456_cast_fp16 = softmax(axis = var_133, x = aw_chunk_39_cast_fp16)[name = tensor("op_456_cast_fp16")]; tensor var_457_cast_fp16 = softmax(axis = var_133, x = aw_chunk_41_cast_fp16)[name = tensor("op_457_cast_fp16")]; tensor var_458_cast_fp16 = softmax(axis = var_133, x = aw_chunk_43_cast_fp16)[name = tensor("op_458_cast_fp16")]; tensor var_459_cast_fp16 = softmax(axis = var_133, x = aw_chunk_45_cast_fp16)[name = tensor("op_459_cast_fp16")]; tensor var_460_cast_fp16 = softmax(axis = var_133, x = aw_chunk_47_cast_fp16)[name = tensor("op_460_cast_fp16")]; tensor var_461_cast_fp16 = softmax(axis = var_133, x = aw_chunk_49_cast_fp16)[name = tensor("op_461_cast_fp16")]; tensor var_462_cast_fp16 = softmax(axis = var_133, x = aw_chunk_51_cast_fp16)[name = tensor("op_462_cast_fp16")]; tensor var_463_cast_fp16 = softmax(axis = var_133, x = aw_chunk_53_cast_fp16)[name = tensor("op_463_cast_fp16")]; tensor var_464_cast_fp16 = softmax(axis = var_133, x = aw_chunk_55_cast_fp16)[name = tensor("op_464_cast_fp16")]; tensor var_465_cast_fp16 = softmax(axis = var_133, x = aw_chunk_57_cast_fp16)[name = tensor("op_465_cast_fp16")]; tensor var_466_cast_fp16 = softmax(axis = var_133, x = aw_chunk_59_cast_fp16)[name = tensor("op_466_cast_fp16")]; tensor var_467_cast_fp16 = softmax(axis = var_133, x = aw_chunk_61_cast_fp16)[name = tensor("op_467_cast_fp16")]; tensor var_468_cast_fp16 = softmax(axis = var_133, x = aw_chunk_63_cast_fp16)[name = tensor("op_468_cast_fp16")]; tensor var_469_cast_fp16 = softmax(axis = var_133, x = aw_chunk_65_cast_fp16)[name = tensor("op_469_cast_fp16")]; tensor var_470_cast_fp16 = softmax(axis = var_133, x = aw_chunk_67_cast_fp16)[name = tensor("op_470_cast_fp16")]; tensor var_471_cast_fp16 = softmax(axis = var_133, x = aw_chunk_69_cast_fp16)[name = tensor("op_471_cast_fp16")]; tensor var_472_cast_fp16 = softmax(axis = var_133, x = aw_chunk_71_cast_fp16)[name = tensor("op_472_cast_fp16")]; tensor var_474_equation_0 = const()[name = tensor("op_474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_474_cast_fp16 = einsum(equation = var_474_equation_0, values = (var_270_cast_fp16, var_437_cast_fp16))[name = tensor("op_474_cast_fp16")]; tensor var_476_equation_0 = const()[name = tensor("op_476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_476_cast_fp16 = einsum(equation = var_476_equation_0, values = (var_270_cast_fp16, var_438_cast_fp16))[name = tensor("op_476_cast_fp16")]; tensor var_478_equation_0 = const()[name = tensor("op_478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_478_cast_fp16 = einsum(equation = var_478_equation_0, values = (var_270_cast_fp16, var_439_cast_fp16))[name = tensor("op_478_cast_fp16")]; tensor var_480_equation_0 = const()[name = tensor("op_480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_480_cast_fp16 = einsum(equation = var_480_equation_0, values = (var_270_cast_fp16, var_440_cast_fp16))[name = tensor("op_480_cast_fp16")]; tensor var_482_equation_0 = const()[name = tensor("op_482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_482_cast_fp16 = einsum(equation = var_482_equation_0, values = (var_270_cast_fp16, var_441_cast_fp16))[name = tensor("op_482_cast_fp16")]; tensor var_484_equation_0 = const()[name = tensor("op_484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_484_cast_fp16 = einsum(equation = var_484_equation_0, values = (var_270_cast_fp16, var_442_cast_fp16))[name = tensor("op_484_cast_fp16")]; tensor var_486_equation_0 = const()[name = tensor("op_486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_486_cast_fp16 = einsum(equation = var_486_equation_0, values = (var_274_cast_fp16, var_443_cast_fp16))[name = tensor("op_486_cast_fp16")]; tensor var_488_equation_0 = const()[name = tensor("op_488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_488_cast_fp16 = einsum(equation = var_488_equation_0, values = (var_274_cast_fp16, var_444_cast_fp16))[name = tensor("op_488_cast_fp16")]; tensor var_490_equation_0 = const()[name = tensor("op_490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_490_cast_fp16 = einsum(equation = var_490_equation_0, values = (var_274_cast_fp16, var_445_cast_fp16))[name = tensor("op_490_cast_fp16")]; tensor var_492_equation_0 = const()[name = tensor("op_492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_492_cast_fp16 = einsum(equation = var_492_equation_0, values = (var_274_cast_fp16, var_446_cast_fp16))[name = tensor("op_492_cast_fp16")]; tensor var_494_equation_0 = const()[name = tensor("op_494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_494_cast_fp16 = einsum(equation = var_494_equation_0, values = (var_274_cast_fp16, var_447_cast_fp16))[name = tensor("op_494_cast_fp16")]; tensor var_496_equation_0 = const()[name = tensor("op_496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_274_cast_fp16, var_448_cast_fp16))[name = tensor("op_496_cast_fp16")]; tensor var_498_equation_0 = const()[name = tensor("op_498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_278_cast_fp16, var_449_cast_fp16))[name = tensor("op_498_cast_fp16")]; tensor var_500_equation_0 = const()[name = tensor("op_500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_278_cast_fp16, var_450_cast_fp16))[name = tensor("op_500_cast_fp16")]; tensor var_502_equation_0 = const()[name = tensor("op_502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_278_cast_fp16, var_451_cast_fp16))[name = tensor("op_502_cast_fp16")]; tensor var_504_equation_0 = const()[name = tensor("op_504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_278_cast_fp16, var_452_cast_fp16))[name = tensor("op_504_cast_fp16")]; tensor var_506_equation_0 = const()[name = tensor("op_506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_278_cast_fp16, var_453_cast_fp16))[name = tensor("op_506_cast_fp16")]; tensor var_508_equation_0 = const()[name = tensor("op_508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_278_cast_fp16, var_454_cast_fp16))[name = tensor("op_508_cast_fp16")]; tensor var_510_equation_0 = const()[name = tensor("op_510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_282_cast_fp16, var_455_cast_fp16))[name = tensor("op_510_cast_fp16")]; tensor var_512_equation_0 = const()[name = tensor("op_512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_282_cast_fp16, var_456_cast_fp16))[name = tensor("op_512_cast_fp16")]; tensor var_514_equation_0 = const()[name = tensor("op_514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_514_cast_fp16 = einsum(equation = var_514_equation_0, values = (var_282_cast_fp16, var_457_cast_fp16))[name = tensor("op_514_cast_fp16")]; tensor var_516_equation_0 = const()[name = tensor("op_516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_516_cast_fp16 = einsum(equation = var_516_equation_0, values = (var_282_cast_fp16, var_458_cast_fp16))[name = tensor("op_516_cast_fp16")]; tensor var_518_equation_0 = const()[name = tensor("op_518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_518_cast_fp16 = einsum(equation = var_518_equation_0, values = (var_282_cast_fp16, var_459_cast_fp16))[name = tensor("op_518_cast_fp16")]; tensor var_520_equation_0 = const()[name = tensor("op_520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_520_cast_fp16 = einsum(equation = var_520_equation_0, values = (var_282_cast_fp16, var_460_cast_fp16))[name = tensor("op_520_cast_fp16")]; tensor var_522_equation_0 = const()[name = tensor("op_522_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_522_cast_fp16 = einsum(equation = var_522_equation_0, values = (var_286_cast_fp16, var_461_cast_fp16))[name = tensor("op_522_cast_fp16")]; tensor var_524_equation_0 = const()[name = tensor("op_524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_524_cast_fp16 = einsum(equation = var_524_equation_0, values = (var_286_cast_fp16, var_462_cast_fp16))[name = tensor("op_524_cast_fp16")]; tensor var_526_equation_0 = const()[name = tensor("op_526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_526_cast_fp16 = einsum(equation = var_526_equation_0, values = (var_286_cast_fp16, var_463_cast_fp16))[name = tensor("op_526_cast_fp16")]; tensor var_528_equation_0 = const()[name = tensor("op_528_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_528_cast_fp16 = einsum(equation = var_528_equation_0, values = (var_286_cast_fp16, var_464_cast_fp16))[name = tensor("op_528_cast_fp16")]; tensor var_530_equation_0 = const()[name = tensor("op_530_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_530_cast_fp16 = einsum(equation = var_530_equation_0, values = (var_286_cast_fp16, var_465_cast_fp16))[name = tensor("op_530_cast_fp16")]; tensor var_532_equation_0 = const()[name = tensor("op_532_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_532_cast_fp16 = einsum(equation = var_532_equation_0, values = (var_286_cast_fp16, var_466_cast_fp16))[name = tensor("op_532_cast_fp16")]; tensor var_534_equation_0 = const()[name = tensor("op_534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_534_cast_fp16 = einsum(equation = var_534_equation_0, values = (var_290_cast_fp16, var_467_cast_fp16))[name = tensor("op_534_cast_fp16")]; tensor var_536_equation_0 = const()[name = tensor("op_536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_536_cast_fp16 = einsum(equation = var_536_equation_0, values = (var_290_cast_fp16, var_468_cast_fp16))[name = tensor("op_536_cast_fp16")]; tensor var_538_equation_0 = const()[name = tensor("op_538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_538_cast_fp16 = einsum(equation = var_538_equation_0, values = (var_290_cast_fp16, var_469_cast_fp16))[name = tensor("op_538_cast_fp16")]; tensor var_540_equation_0 = const()[name = tensor("op_540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_540_cast_fp16 = einsum(equation = var_540_equation_0, values = (var_290_cast_fp16, var_470_cast_fp16))[name = tensor("op_540_cast_fp16")]; tensor var_542_equation_0 = const()[name = tensor("op_542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_542_cast_fp16 = einsum(equation = var_542_equation_0, values = (var_290_cast_fp16, var_471_cast_fp16))[name = tensor("op_542_cast_fp16")]; tensor var_544_equation_0 = const()[name = tensor("op_544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_544_cast_fp16 = einsum(equation = var_544_equation_0, values = (var_290_cast_fp16, var_472_cast_fp16))[name = tensor("op_544_cast_fp16")]; tensor var_546_interleave_0 = const()[name = tensor("op_546_interleave_0"), val = tensor(false)]; tensor var_546_cast_fp16 = concat(axis = var_121, interleave = var_546_interleave_0, values = (var_474_cast_fp16, var_476_cast_fp16, var_478_cast_fp16, var_480_cast_fp16, var_482_cast_fp16, var_484_cast_fp16))[name = tensor("op_546_cast_fp16")]; tensor var_548_interleave_0 = const()[name = tensor("op_548_interleave_0"), val = tensor(false)]; tensor var_548_cast_fp16 = concat(axis = var_121, interleave = var_548_interleave_0, values = (var_486_cast_fp16, var_488_cast_fp16, var_490_cast_fp16, var_492_cast_fp16, var_494_cast_fp16, var_496_cast_fp16))[name = tensor("op_548_cast_fp16")]; tensor var_550_interleave_0 = const()[name = tensor("op_550_interleave_0"), val = tensor(false)]; tensor var_550_cast_fp16 = concat(axis = var_121, interleave = var_550_interleave_0, values = (var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16))[name = tensor("op_550_cast_fp16")]; tensor var_552_interleave_0 = const()[name = tensor("op_552_interleave_0"), val = tensor(false)]; tensor var_552_cast_fp16 = concat(axis = var_121, interleave = var_552_interleave_0, values = (var_510_cast_fp16, var_512_cast_fp16, var_514_cast_fp16, var_516_cast_fp16, var_518_cast_fp16, var_520_cast_fp16))[name = tensor("op_552_cast_fp16")]; tensor var_554_interleave_0 = const()[name = tensor("op_554_interleave_0"), val = tensor(false)]; tensor var_554_cast_fp16 = concat(axis = var_121, interleave = var_554_interleave_0, values = (var_522_cast_fp16, var_524_cast_fp16, var_526_cast_fp16, var_528_cast_fp16, var_530_cast_fp16, var_532_cast_fp16))[name = tensor("op_554_cast_fp16")]; tensor var_556_interleave_0 = const()[name = tensor("op_556_interleave_0"), val = tensor(false)]; tensor var_556_cast_fp16 = concat(axis = var_121, interleave = var_556_interleave_0, values = (var_534_cast_fp16, var_536_cast_fp16, var_538_cast_fp16, var_540_cast_fp16, var_542_cast_fp16, var_544_cast_fp16))[name = tensor("op_556_cast_fp16")]; tensor input_1_interleave_0 = const()[name = tensor("input_1_interleave_0"), val = tensor(false)]; tensor input_1_cast_fp16 = concat(axis = var_133, interleave = input_1_interleave_0, values = (var_546_cast_fp16, var_548_cast_fp16, var_550_cast_fp16, var_552_cast_fp16, var_554_cast_fp16, var_556_cast_fp16))[name = tensor("input_1_cast_fp16")]; tensor obj_3_pad_type_0 = const()[name = tensor("obj_3_pad_type_0"), val = tensor("valid")]; tensor obj_3_strides_0 = const()[name = tensor("obj_3_strides_0"), val = tensor([1, 1])]; tensor obj_3_pad_0 = const()[name = tensor("obj_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_3_dilations_0 = const()[name = tensor("obj_3_dilations_0"), val = tensor([1, 1])]; tensor obj_3_groups_0 = const()[name = tensor("obj_3_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3112896)))]; tensor layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3407872)))]; tensor obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = obj_3_dilations_0, groups = obj_3_groups_0, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = obj_3_strides_0, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("obj_3_cast_fp16")]; tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; tensor var_575_to_fp16 = const()[name = tensor("op_575_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_575_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3408704)))]; tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3409536)))]; tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor input_5_pad_type_0 = const()[name = tensor("input_5_pad_type_0"), val = tensor("valid")]; tensor input_5_strides_0 = const()[name = tensor("input_5_strides_0"), val = tensor([1, 1])]; tensor input_5_pad_0 = const()[name = tensor("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_5_dilations_0 = const()[name = tensor("input_5_dilations_0"), val = tensor([1, 1])]; tensor input_5_groups_0 = const()[name = tensor("input_5_groups_0"), val = tensor(1)]; tensor layers_0_fc1_weight_to_fp16 = const()[name = tensor("layers_0_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3410368)))]; tensor layers_0_fc1_bias_to_fp16 = const()[name = tensor("layers_0_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4590080)))]; tensor input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor hidden_states_5_pad_type_0 = const()[name = tensor("hidden_states_5_pad_type_0"), val = tensor("valid")]; tensor hidden_states_5_strides_0 = const()[name = tensor("hidden_states_5_strides_0"), val = tensor([1, 1])]; tensor hidden_states_5_pad_0 = const()[name = tensor("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_5_dilations_0 = const()[name = tensor("hidden_states_5_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_5_groups_0 = const()[name = tensor("hidden_states_5_groups_0"), val = tensor(1)]; tensor layers_0_fc2_weight_to_fp16 = const()[name = tensor("layers_0_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4593216)))]; tensor layers_0_fc2_bias_to_fp16 = const()[name = tensor("layers_0_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5772928)))]; tensor hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = hidden_states_5_dilations_0, groups = hidden_states_5_groups_0, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = hidden_states_5_strides_0, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; tensor var_607 = const()[name = tensor("op_607"), val = tensor(3)]; tensor var_619 = const()[name = tensor("op_619"), val = tensor(1)]; tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; tensor var_636_to_fp16 = const()[name = tensor("op_636_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_636_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5773760)))]; tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5774592)))]; tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; tensor query_3_pad_type_0 = const()[name = tensor("query_3_pad_type_0"), val = tensor("valid")]; tensor query_3_strides_0 = const()[name = tensor("query_3_strides_0"), val = tensor([1, 1])]; tensor query_3_pad_0 = const()[name = tensor("query_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_3_dilations_0 = const()[name = tensor("query_3_dilations_0"), val = tensor([1, 1])]; tensor query_3_groups_0 = const()[name = tensor("query_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5775424)))]; tensor layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6070400)))]; tensor query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = query_3_dilations_0, groups = query_3_groups_0, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = query_3_strides_0, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("query_3_cast_fp16")]; tensor key_3_pad_type_0 = const()[name = tensor("key_3_pad_type_0"), val = tensor("valid")]; tensor key_3_strides_0 = const()[name = tensor("key_3_strides_0"), val = tensor([1, 1])]; tensor key_3_pad_0 = const()[name = tensor("key_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_3_dilations_0 = const()[name = tensor("key_3_dilations_0"), val = tensor([1, 1])]; tensor key_3_groups_0 = const()[name = tensor("key_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6071232)))]; tensor key_3_cast_fp16 = conv(dilations = key_3_dilations_0, groups = key_3_groups_0, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = key_3_strides_0, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("key_3_cast_fp16")]; tensor value_3_pad_type_0 = const()[name = tensor("value_3_pad_type_0"), val = tensor("valid")]; tensor value_3_strides_0 = const()[name = tensor("value_3_strides_0"), val = tensor([1, 1])]; tensor value_3_pad_0 = const()[name = tensor("value_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_3_dilations_0 = const()[name = tensor("value_3_dilations_0"), val = tensor([1, 1])]; tensor value_3_groups_0 = const()[name = tensor("value_3_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6366208)))]; tensor layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6661184)))]; tensor value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = value_3_dilations_0, groups = value_3_groups_0, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = value_3_strides_0, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("value_3_cast_fp16")]; tensor var_671_begin_0 = const()[name = tensor("op_671_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_671_end_0 = const()[name = tensor("op_671_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_671_end_mask_0 = const()[name = tensor("op_671_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_671_cast_fp16")]; tensor var_675_begin_0 = const()[name = tensor("op_675_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_675_end_0 = const()[name = tensor("op_675_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_675_end_mask_0 = const()[name = tensor("op_675_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_675_cast_fp16 = slice_by_index(begin = var_675_begin_0, end = var_675_end_0, end_mask = var_675_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_675_cast_fp16")]; tensor var_679_begin_0 = const()[name = tensor("op_679_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_679_end_0 = const()[name = tensor("op_679_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_679_end_mask_0 = const()[name = tensor("op_679_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_679_cast_fp16 = slice_by_index(begin = var_679_begin_0, end = var_679_end_0, end_mask = var_679_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_679_cast_fp16")]; tensor var_683_begin_0 = const()[name = tensor("op_683_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_683_end_0 = const()[name = tensor("op_683_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_683_end_mask_0 = const()[name = tensor("op_683_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_683_cast_fp16 = slice_by_index(begin = var_683_begin_0, end = var_683_end_0, end_mask = var_683_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_683_cast_fp16")]; tensor var_687_begin_0 = const()[name = tensor("op_687_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_687_end_0 = const()[name = tensor("op_687_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_687_end_mask_0 = const()[name = tensor("op_687_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_687_cast_fp16 = slice_by_index(begin = var_687_begin_0, end = var_687_end_0, end_mask = var_687_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_687_cast_fp16")]; tensor var_691_begin_0 = const()[name = tensor("op_691_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_691_end_0 = const()[name = tensor("op_691_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_691_end_mask_0 = const()[name = tensor("op_691_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_691_cast_fp16 = slice_by_index(begin = var_691_begin_0, end = var_691_end_0, end_mask = var_691_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_691_cast_fp16")]; tensor var_694_begin_0 = const()[name = tensor("op_694_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_694_end_0 = const()[name = tensor("op_694_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_694_end_mask_0 = const()[name = tensor("op_694_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_694_cast_fp16 = slice_by_index(begin = var_694_begin_0, end = var_694_end_0, end_mask = var_694_end_mask_0, x = var_671_cast_fp16)[name = tensor("op_694_cast_fp16")]; tensor var_695_begin_0 = const()[name = tensor("op_695_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_695_end_0 = const()[name = tensor("op_695_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_695_end_mask_0 = const()[name = tensor("op_695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_695_cast_fp16 = slice_by_index(begin = var_695_begin_0, end = var_695_end_0, end_mask = var_695_end_mask_0, x = var_671_cast_fp16)[name = tensor("op_695_cast_fp16")]; tensor var_696_begin_0 = const()[name = tensor("op_696_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_696_end_0 = const()[name = tensor("op_696_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_696_end_mask_0 = const()[name = tensor("op_696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_696_cast_fp16 = slice_by_index(begin = var_696_begin_0, end = var_696_end_0, end_mask = var_696_end_mask_0, x = var_671_cast_fp16)[name = tensor("op_696_cast_fp16")]; tensor var_697_begin_0 = const()[name = tensor("op_697_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_697_end_0 = const()[name = tensor("op_697_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_697_end_mask_0 = const()[name = tensor("op_697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_697_cast_fp16 = slice_by_index(begin = var_697_begin_0, end = var_697_end_0, end_mask = var_697_end_mask_0, x = var_671_cast_fp16)[name = tensor("op_697_cast_fp16")]; tensor var_698_begin_0 = const()[name = tensor("op_698_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_698_end_0 = const()[name = tensor("op_698_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_698_end_mask_0 = const()[name = tensor("op_698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_698_cast_fp16 = slice_by_index(begin = var_698_begin_0, end = var_698_end_0, end_mask = var_698_end_mask_0, x = var_671_cast_fp16)[name = tensor("op_698_cast_fp16")]; tensor var_699_begin_0 = const()[name = tensor("op_699_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_699_end_0 = const()[name = tensor("op_699_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_699_end_mask_0 = const()[name = tensor("op_699_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_699_cast_fp16 = slice_by_index(begin = var_699_begin_0, end = var_699_end_0, end_mask = var_699_end_mask_0, x = var_671_cast_fp16)[name = tensor("op_699_cast_fp16")]; tensor var_700_begin_0 = const()[name = tensor("op_700_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_700_end_0 = const()[name = tensor("op_700_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_700_end_mask_0 = const()[name = tensor("op_700_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_700_cast_fp16 = slice_by_index(begin = var_700_begin_0, end = var_700_end_0, end_mask = var_700_end_mask_0, x = var_675_cast_fp16)[name = tensor("op_700_cast_fp16")]; tensor var_701_begin_0 = const()[name = tensor("op_701_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_701_end_0 = const()[name = tensor("op_701_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_701_end_mask_0 = const()[name = tensor("op_701_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_701_cast_fp16 = slice_by_index(begin = var_701_begin_0, end = var_701_end_0, end_mask = var_701_end_mask_0, x = var_675_cast_fp16)[name = tensor("op_701_cast_fp16")]; tensor var_702_begin_0 = const()[name = tensor("op_702_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_702_end_0 = const()[name = tensor("op_702_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_702_end_mask_0 = const()[name = tensor("op_702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_702_cast_fp16 = slice_by_index(begin = var_702_begin_0, end = var_702_end_0, end_mask = var_702_end_mask_0, x = var_675_cast_fp16)[name = tensor("op_702_cast_fp16")]; tensor var_703_begin_0 = const()[name = tensor("op_703_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_703_end_0 = const()[name = tensor("op_703_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_703_end_mask_0 = const()[name = tensor("op_703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_703_cast_fp16 = slice_by_index(begin = var_703_begin_0, end = var_703_end_0, end_mask = var_703_end_mask_0, x = var_675_cast_fp16)[name = tensor("op_703_cast_fp16")]; tensor var_704_begin_0 = const()[name = tensor("op_704_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_704_end_0 = const()[name = tensor("op_704_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_704_end_mask_0 = const()[name = tensor("op_704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_704_cast_fp16 = slice_by_index(begin = var_704_begin_0, end = var_704_end_0, end_mask = var_704_end_mask_0, x = var_675_cast_fp16)[name = tensor("op_704_cast_fp16")]; tensor var_705_begin_0 = const()[name = tensor("op_705_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_705_end_0 = const()[name = tensor("op_705_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_705_end_mask_0 = const()[name = tensor("op_705_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_705_cast_fp16 = slice_by_index(begin = var_705_begin_0, end = var_705_end_0, end_mask = var_705_end_mask_0, x = var_675_cast_fp16)[name = tensor("op_705_cast_fp16")]; tensor var_706_begin_0 = const()[name = tensor("op_706_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_706_end_0 = const()[name = tensor("op_706_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_706_end_mask_0 = const()[name = tensor("op_706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_706_cast_fp16 = slice_by_index(begin = var_706_begin_0, end = var_706_end_0, end_mask = var_706_end_mask_0, x = var_679_cast_fp16)[name = tensor("op_706_cast_fp16")]; tensor var_707_begin_0 = const()[name = tensor("op_707_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_707_end_0 = const()[name = tensor("op_707_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_707_end_mask_0 = const()[name = tensor("op_707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_707_cast_fp16 = slice_by_index(begin = var_707_begin_0, end = var_707_end_0, end_mask = var_707_end_mask_0, x = var_679_cast_fp16)[name = tensor("op_707_cast_fp16")]; tensor var_708_begin_0 = const()[name = tensor("op_708_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_708_end_0 = const()[name = tensor("op_708_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_708_end_mask_0 = const()[name = tensor("op_708_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_708_cast_fp16 = slice_by_index(begin = var_708_begin_0, end = var_708_end_0, end_mask = var_708_end_mask_0, x = var_679_cast_fp16)[name = tensor("op_708_cast_fp16")]; tensor var_709_begin_0 = const()[name = tensor("op_709_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_709_end_0 = const()[name = tensor("op_709_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_709_end_mask_0 = const()[name = tensor("op_709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_709_cast_fp16 = slice_by_index(begin = var_709_begin_0, end = var_709_end_0, end_mask = var_709_end_mask_0, x = var_679_cast_fp16)[name = tensor("op_709_cast_fp16")]; tensor var_710_begin_0 = const()[name = tensor("op_710_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_710_end_0 = const()[name = tensor("op_710_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_710_end_mask_0 = const()[name = tensor("op_710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_710_cast_fp16 = slice_by_index(begin = var_710_begin_0, end = var_710_end_0, end_mask = var_710_end_mask_0, x = var_679_cast_fp16)[name = tensor("op_710_cast_fp16")]; tensor var_711_begin_0 = const()[name = tensor("op_711_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_711_end_0 = const()[name = tensor("op_711_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_711_end_mask_0 = const()[name = tensor("op_711_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_711_cast_fp16 = slice_by_index(begin = var_711_begin_0, end = var_711_end_0, end_mask = var_711_end_mask_0, x = var_679_cast_fp16)[name = tensor("op_711_cast_fp16")]; tensor var_712_begin_0 = const()[name = tensor("op_712_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_712_end_0 = const()[name = tensor("op_712_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_712_end_mask_0 = const()[name = tensor("op_712_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_712_cast_fp16 = slice_by_index(begin = var_712_begin_0, end = var_712_end_0, end_mask = var_712_end_mask_0, x = var_683_cast_fp16)[name = tensor("op_712_cast_fp16")]; tensor var_713_begin_0 = const()[name = tensor("op_713_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_713_end_0 = const()[name = tensor("op_713_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_713_end_mask_0 = const()[name = tensor("op_713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_713_cast_fp16 = slice_by_index(begin = var_713_begin_0, end = var_713_end_0, end_mask = var_713_end_mask_0, x = var_683_cast_fp16)[name = tensor("op_713_cast_fp16")]; tensor var_714_begin_0 = const()[name = tensor("op_714_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_714_end_0 = const()[name = tensor("op_714_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_714_end_mask_0 = const()[name = tensor("op_714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_714_cast_fp16 = slice_by_index(begin = var_714_begin_0, end = var_714_end_0, end_mask = var_714_end_mask_0, x = var_683_cast_fp16)[name = tensor("op_714_cast_fp16")]; tensor var_715_begin_0 = const()[name = tensor("op_715_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_715_end_0 = const()[name = tensor("op_715_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_715_end_mask_0 = const()[name = tensor("op_715_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_715_cast_fp16 = slice_by_index(begin = var_715_begin_0, end = var_715_end_0, end_mask = var_715_end_mask_0, x = var_683_cast_fp16)[name = tensor("op_715_cast_fp16")]; tensor var_716_begin_0 = const()[name = tensor("op_716_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_716_end_0 = const()[name = tensor("op_716_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_716_end_mask_0 = const()[name = tensor("op_716_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_716_cast_fp16 = slice_by_index(begin = var_716_begin_0, end = var_716_end_0, end_mask = var_716_end_mask_0, x = var_683_cast_fp16)[name = tensor("op_716_cast_fp16")]; tensor var_717_begin_0 = const()[name = tensor("op_717_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_717_end_0 = const()[name = tensor("op_717_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_717_end_mask_0 = const()[name = tensor("op_717_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_717_cast_fp16 = slice_by_index(begin = var_717_begin_0, end = var_717_end_0, end_mask = var_717_end_mask_0, x = var_683_cast_fp16)[name = tensor("op_717_cast_fp16")]; tensor var_718_begin_0 = const()[name = tensor("op_718_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_718_end_0 = const()[name = tensor("op_718_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_718_end_mask_0 = const()[name = tensor("op_718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_718_cast_fp16 = slice_by_index(begin = var_718_begin_0, end = var_718_end_0, end_mask = var_718_end_mask_0, x = var_687_cast_fp16)[name = tensor("op_718_cast_fp16")]; tensor var_719_begin_0 = const()[name = tensor("op_719_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_719_end_0 = const()[name = tensor("op_719_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_719_end_mask_0 = const()[name = tensor("op_719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_719_cast_fp16 = slice_by_index(begin = var_719_begin_0, end = var_719_end_0, end_mask = var_719_end_mask_0, x = var_687_cast_fp16)[name = tensor("op_719_cast_fp16")]; tensor var_720_begin_0 = const()[name = tensor("op_720_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_720_end_0 = const()[name = tensor("op_720_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_720_end_mask_0 = const()[name = tensor("op_720_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_720_cast_fp16 = slice_by_index(begin = var_720_begin_0, end = var_720_end_0, end_mask = var_720_end_mask_0, x = var_687_cast_fp16)[name = tensor("op_720_cast_fp16")]; tensor var_721_begin_0 = const()[name = tensor("op_721_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_721_end_0 = const()[name = tensor("op_721_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_721_end_mask_0 = const()[name = tensor("op_721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_721_cast_fp16 = slice_by_index(begin = var_721_begin_0, end = var_721_end_0, end_mask = var_721_end_mask_0, x = var_687_cast_fp16)[name = tensor("op_721_cast_fp16")]; tensor var_722_begin_0 = const()[name = tensor("op_722_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_722_end_0 = const()[name = tensor("op_722_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_722_end_mask_0 = const()[name = tensor("op_722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_722_cast_fp16 = slice_by_index(begin = var_722_begin_0, end = var_722_end_0, end_mask = var_722_end_mask_0, x = var_687_cast_fp16)[name = tensor("op_722_cast_fp16")]; tensor var_723_begin_0 = const()[name = tensor("op_723_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_723_end_0 = const()[name = tensor("op_723_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_723_end_mask_0 = const()[name = tensor("op_723_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_723_cast_fp16 = slice_by_index(begin = var_723_begin_0, end = var_723_end_0, end_mask = var_723_end_mask_0, x = var_687_cast_fp16)[name = tensor("op_723_cast_fp16")]; tensor var_724_begin_0 = const()[name = tensor("op_724_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_724_end_0 = const()[name = tensor("op_724_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_724_end_mask_0 = const()[name = tensor("op_724_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_724_cast_fp16 = slice_by_index(begin = var_724_begin_0, end = var_724_end_0, end_mask = var_724_end_mask_0, x = var_691_cast_fp16)[name = tensor("op_724_cast_fp16")]; tensor var_725_begin_0 = const()[name = tensor("op_725_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_725_end_0 = const()[name = tensor("op_725_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_725_end_mask_0 = const()[name = tensor("op_725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_725_cast_fp16 = slice_by_index(begin = var_725_begin_0, end = var_725_end_0, end_mask = var_725_end_mask_0, x = var_691_cast_fp16)[name = tensor("op_725_cast_fp16")]; tensor var_726_begin_0 = const()[name = tensor("op_726_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_726_end_0 = const()[name = tensor("op_726_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_726_end_mask_0 = const()[name = tensor("op_726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_726_cast_fp16 = slice_by_index(begin = var_726_begin_0, end = var_726_end_0, end_mask = var_726_end_mask_0, x = var_691_cast_fp16)[name = tensor("op_726_cast_fp16")]; tensor var_727_begin_0 = const()[name = tensor("op_727_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_727_end_0 = const()[name = tensor("op_727_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_727_end_mask_0 = const()[name = tensor("op_727_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_727_cast_fp16 = slice_by_index(begin = var_727_begin_0, end = var_727_end_0, end_mask = var_727_end_mask_0, x = var_691_cast_fp16)[name = tensor("op_727_cast_fp16")]; tensor var_728_begin_0 = const()[name = tensor("op_728_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_728_end_0 = const()[name = tensor("op_728_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_728_end_mask_0 = const()[name = tensor("op_728_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_728_cast_fp16 = slice_by_index(begin = var_728_begin_0, end = var_728_end_0, end_mask = var_728_end_mask_0, x = var_691_cast_fp16)[name = tensor("op_728_cast_fp16")]; tensor var_729_begin_0 = const()[name = tensor("op_729_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_729_end_0 = const()[name = tensor("op_729_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_729_end_mask_0 = const()[name = tensor("op_729_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_729_cast_fp16 = slice_by_index(begin = var_729_begin_0, end = var_729_end_0, end_mask = var_729_end_mask_0, x = var_691_cast_fp16)[name = tensor("op_729_cast_fp16")]; tensor k_3_perm_0 = const()[name = tensor("k_3_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_734_begin_0 = const()[name = tensor("op_734_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_734_end_0 = const()[name = tensor("op_734_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_734_end_mask_0 = const()[name = tensor("op_734_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor("transpose_2")]; tensor var_734_cast_fp16 = slice_by_index(begin = var_734_begin_0, end = var_734_end_0, end_mask = var_734_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_734_cast_fp16")]; tensor var_738_begin_0 = const()[name = tensor("op_738_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_738_end_0 = const()[name = tensor("op_738_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_738_end_mask_0 = const()[name = tensor("op_738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_738_cast_fp16 = slice_by_index(begin = var_738_begin_0, end = var_738_end_0, end_mask = var_738_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_738_cast_fp16")]; tensor var_742_begin_0 = const()[name = tensor("op_742_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_742_end_0 = const()[name = tensor("op_742_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_742_end_mask_0 = const()[name = tensor("op_742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_742_cast_fp16 = slice_by_index(begin = var_742_begin_0, end = var_742_end_0, end_mask = var_742_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_742_cast_fp16")]; tensor var_746_begin_0 = const()[name = tensor("op_746_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_746_end_0 = const()[name = tensor("op_746_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_746_end_mask_0 = const()[name = tensor("op_746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_746_cast_fp16")]; tensor var_750_begin_0 = const()[name = tensor("op_750_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_750_end_0 = const()[name = tensor("op_750_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_750_end_mask_0 = const()[name = tensor("op_750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_750_cast_fp16 = slice_by_index(begin = var_750_begin_0, end = var_750_end_0, end_mask = var_750_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_750_cast_fp16")]; tensor var_754_begin_0 = const()[name = tensor("op_754_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_754_end_0 = const()[name = tensor("op_754_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_754_end_mask_0 = const()[name = tensor("op_754_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_754_cast_fp16 = slice_by_index(begin = var_754_begin_0, end = var_754_end_0, end_mask = var_754_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_754_cast_fp16")]; tensor var_756_begin_0 = const()[name = tensor("op_756_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_756_end_0 = const()[name = tensor("op_756_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_756_end_mask_0 = const()[name = tensor("op_756_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_756_cast_fp16 = slice_by_index(begin = var_756_begin_0, end = var_756_end_0, end_mask = var_756_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_756_cast_fp16")]; tensor var_760_begin_0 = const()[name = tensor("op_760_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_760_end_0 = const()[name = tensor("op_760_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_760_end_mask_0 = const()[name = tensor("op_760_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_760_cast_fp16 = slice_by_index(begin = var_760_begin_0, end = var_760_end_0, end_mask = var_760_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_760_cast_fp16")]; tensor var_764_begin_0 = const()[name = tensor("op_764_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_764_end_0 = const()[name = tensor("op_764_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_764_end_mask_0 = const()[name = tensor("op_764_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_764_cast_fp16 = slice_by_index(begin = var_764_begin_0, end = var_764_end_0, end_mask = var_764_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_764_cast_fp16")]; tensor var_768_begin_0 = const()[name = tensor("op_768_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_768_end_0 = const()[name = tensor("op_768_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_768_end_mask_0 = const()[name = tensor("op_768_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_768_cast_fp16 = slice_by_index(begin = var_768_begin_0, end = var_768_end_0, end_mask = var_768_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_768_cast_fp16")]; tensor var_772_begin_0 = const()[name = tensor("op_772_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_772_end_0 = const()[name = tensor("op_772_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_772_end_mask_0 = const()[name = tensor("op_772_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_772_cast_fp16 = slice_by_index(begin = var_772_begin_0, end = var_772_end_0, end_mask = var_772_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_772_cast_fp16")]; tensor var_776_begin_0 = const()[name = tensor("op_776_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_776_end_0 = const()[name = tensor("op_776_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_776_end_mask_0 = const()[name = tensor("op_776_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_776_cast_fp16 = slice_by_index(begin = var_776_begin_0, end = var_776_end_0, end_mask = var_776_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_776_cast_fp16")]; tensor _SplitHeadsQ__mh_w_73_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_73_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_73_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_73_equation_0, values = (var_734_cast_fp16, var_694_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_73_cast_fp16")]; tensor _SplitHeadsQ__mh_w_75_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_75_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_75_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_75_equation_0, values = (var_734_cast_fp16, var_695_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_75_cast_fp16")]; tensor _SplitHeadsQ__mh_w_77_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_77_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_77_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_77_equation_0, values = (var_734_cast_fp16, var_696_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_77_cast_fp16")]; tensor _SplitHeadsQ__mh_w_79_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_79_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_79_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_79_equation_0, values = (var_734_cast_fp16, var_697_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_79_cast_fp16")]; tensor _SplitHeadsQ__mh_w_81_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_81_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_81_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_81_equation_0, values = (var_734_cast_fp16, var_698_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_81_cast_fp16")]; tensor _SplitHeadsQ__mh_w_83_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_83_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_83_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_83_equation_0, values = (var_734_cast_fp16, var_699_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_83_cast_fp16")]; tensor _SplitHeadsQ__mh_w_85_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_85_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_85_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_85_equation_0, values = (var_738_cast_fp16, var_700_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_85_cast_fp16")]; tensor _SplitHeadsQ__mh_w_87_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_87_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_87_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_87_equation_0, values = (var_738_cast_fp16, var_701_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_87_cast_fp16")]; tensor _SplitHeadsQ__mh_w_89_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_89_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_89_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_89_equation_0, values = (var_738_cast_fp16, var_702_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_89_cast_fp16")]; tensor _SplitHeadsQ__mh_w_91_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_91_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_91_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_91_equation_0, values = (var_738_cast_fp16, var_703_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_91_cast_fp16")]; tensor _SplitHeadsQ__mh_w_93_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_93_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_93_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_93_equation_0, values = (var_738_cast_fp16, var_704_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_93_cast_fp16")]; tensor _SplitHeadsQ__mh_w_95_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_95_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_95_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_95_equation_0, values = (var_738_cast_fp16, var_705_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_95_cast_fp16")]; tensor _SplitHeadsQ__mh_w_97_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_97_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_97_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_97_equation_0, values = (var_742_cast_fp16, var_706_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_97_cast_fp16")]; tensor _SplitHeadsQ__mh_w_99_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_99_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_99_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_99_equation_0, values = (var_742_cast_fp16, var_707_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_99_cast_fp16")]; tensor _SplitHeadsQ__mh_w_101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_101_equation_0, values = (var_742_cast_fp16, var_708_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_103_equation_0, values = (var_742_cast_fp16, var_709_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_105_equation_0, values = (var_742_cast_fp16, var_710_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_107_equation_0, values = (var_742_cast_fp16, var_711_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_109_equation_0, values = (var_746_cast_fp16, var_712_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_111_equation_0, values = (var_746_cast_fp16, var_713_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_113_equation_0, values = (var_746_cast_fp16, var_714_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_115_equation_0, values = (var_746_cast_fp16, var_715_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_117_equation_0, values = (var_746_cast_fp16, var_716_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_119_equation_0, values = (var_746_cast_fp16, var_717_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_121_equation_0, values = (var_750_cast_fp16, var_718_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_123_equation_0, values = (var_750_cast_fp16, var_719_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_125_equation_0, values = (var_750_cast_fp16, var_720_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_127_equation_0, values = (var_750_cast_fp16, var_721_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_129_equation_0, values = (var_750_cast_fp16, var_722_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_131_equation_0, values = (var_750_cast_fp16, var_723_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_133_equation_0, values = (var_754_cast_fp16, var_724_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_135_equation_0, values = (var_754_cast_fp16, var_725_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_137_equation_0, values = (var_754_cast_fp16, var_726_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_139_equation_0, values = (var_754_cast_fp16, var_727_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_141_equation_0, values = (var_754_cast_fp16, var_728_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_143_equation_0, values = (var_754_cast_fp16, var_729_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_143_cast_fp16")]; tensor var_851_to_fp16 = const()[name = tensor("op_851_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_73_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_73_cast_fp16, y = var_851_to_fp16)[name = tensor("aw_chunk_73_cast_fp16")]; tensor var_853_to_fp16 = const()[name = tensor("op_853_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_75_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_75_cast_fp16, y = var_853_to_fp16)[name = tensor("aw_chunk_75_cast_fp16")]; tensor var_855_to_fp16 = const()[name = tensor("op_855_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_77_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_77_cast_fp16, y = var_855_to_fp16)[name = tensor("aw_chunk_77_cast_fp16")]; tensor var_857_to_fp16 = const()[name = tensor("op_857_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_79_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_79_cast_fp16, y = var_857_to_fp16)[name = tensor("aw_chunk_79_cast_fp16")]; tensor var_859_to_fp16 = const()[name = tensor("op_859_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_81_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_81_cast_fp16, y = var_859_to_fp16)[name = tensor("aw_chunk_81_cast_fp16")]; tensor var_861_to_fp16 = const()[name = tensor("op_861_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_83_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_83_cast_fp16, y = var_861_to_fp16)[name = tensor("aw_chunk_83_cast_fp16")]; tensor var_863_to_fp16 = const()[name = tensor("op_863_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_85_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_85_cast_fp16, y = var_863_to_fp16)[name = tensor("aw_chunk_85_cast_fp16")]; tensor var_865_to_fp16 = const()[name = tensor("op_865_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_87_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_87_cast_fp16, y = var_865_to_fp16)[name = tensor("aw_chunk_87_cast_fp16")]; tensor var_867_to_fp16 = const()[name = tensor("op_867_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_89_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_89_cast_fp16, y = var_867_to_fp16)[name = tensor("aw_chunk_89_cast_fp16")]; tensor var_869_to_fp16 = const()[name = tensor("op_869_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_91_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_91_cast_fp16, y = var_869_to_fp16)[name = tensor("aw_chunk_91_cast_fp16")]; tensor var_871_to_fp16 = const()[name = tensor("op_871_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_93_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_93_cast_fp16, y = var_871_to_fp16)[name = tensor("aw_chunk_93_cast_fp16")]; tensor var_873_to_fp16 = const()[name = tensor("op_873_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_95_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_95_cast_fp16, y = var_873_to_fp16)[name = tensor("aw_chunk_95_cast_fp16")]; tensor var_875_to_fp16 = const()[name = tensor("op_875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_97_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_97_cast_fp16, y = var_875_to_fp16)[name = tensor("aw_chunk_97_cast_fp16")]; tensor var_877_to_fp16 = const()[name = tensor("op_877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_99_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_99_cast_fp16, y = var_877_to_fp16)[name = tensor("aw_chunk_99_cast_fp16")]; tensor var_879_to_fp16 = const()[name = tensor("op_879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_101_cast_fp16, y = var_879_to_fp16)[name = tensor("aw_chunk_101_cast_fp16")]; tensor var_881_to_fp16 = const()[name = tensor("op_881_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_103_cast_fp16, y = var_881_to_fp16)[name = tensor("aw_chunk_103_cast_fp16")]; tensor var_883_to_fp16 = const()[name = tensor("op_883_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_105_cast_fp16, y = var_883_to_fp16)[name = tensor("aw_chunk_105_cast_fp16")]; tensor var_885_to_fp16 = const()[name = tensor("op_885_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_107_cast_fp16, y = var_885_to_fp16)[name = tensor("aw_chunk_107_cast_fp16")]; tensor var_887_to_fp16 = const()[name = tensor("op_887_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_109_cast_fp16, y = var_887_to_fp16)[name = tensor("aw_chunk_109_cast_fp16")]; tensor var_889_to_fp16 = const()[name = tensor("op_889_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_111_cast_fp16, y = var_889_to_fp16)[name = tensor("aw_chunk_111_cast_fp16")]; tensor var_891_to_fp16 = const()[name = tensor("op_891_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_113_cast_fp16, y = var_891_to_fp16)[name = tensor("aw_chunk_113_cast_fp16")]; tensor var_893_to_fp16 = const()[name = tensor("op_893_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_115_cast_fp16, y = var_893_to_fp16)[name = tensor("aw_chunk_115_cast_fp16")]; tensor var_895_to_fp16 = const()[name = tensor("op_895_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_117_cast_fp16, y = var_895_to_fp16)[name = tensor("aw_chunk_117_cast_fp16")]; tensor var_897_to_fp16 = const()[name = tensor("op_897_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_119_cast_fp16, y = var_897_to_fp16)[name = tensor("aw_chunk_119_cast_fp16")]; tensor var_899_to_fp16 = const()[name = tensor("op_899_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_121_cast_fp16, y = var_899_to_fp16)[name = tensor("aw_chunk_121_cast_fp16")]; tensor var_901_to_fp16 = const()[name = tensor("op_901_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_123_cast_fp16, y = var_901_to_fp16)[name = tensor("aw_chunk_123_cast_fp16")]; tensor var_903_to_fp16 = const()[name = tensor("op_903_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_125_cast_fp16, y = var_903_to_fp16)[name = tensor("aw_chunk_125_cast_fp16")]; tensor var_905_to_fp16 = const()[name = tensor("op_905_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_127_cast_fp16, y = var_905_to_fp16)[name = tensor("aw_chunk_127_cast_fp16")]; tensor var_907_to_fp16 = const()[name = tensor("op_907_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_129_cast_fp16, y = var_907_to_fp16)[name = tensor("aw_chunk_129_cast_fp16")]; tensor var_909_to_fp16 = const()[name = tensor("op_909_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_131_cast_fp16, y = var_909_to_fp16)[name = tensor("aw_chunk_131_cast_fp16")]; tensor var_911_to_fp16 = const()[name = tensor("op_911_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_133_cast_fp16, y = var_911_to_fp16)[name = tensor("aw_chunk_133_cast_fp16")]; tensor var_913_to_fp16 = const()[name = tensor("op_913_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_135_cast_fp16, y = var_913_to_fp16)[name = tensor("aw_chunk_135_cast_fp16")]; tensor var_915_to_fp16 = const()[name = tensor("op_915_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_137_cast_fp16, y = var_915_to_fp16)[name = tensor("aw_chunk_137_cast_fp16")]; tensor var_917_to_fp16 = const()[name = tensor("op_917_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_139_cast_fp16, y = var_917_to_fp16)[name = tensor("aw_chunk_139_cast_fp16")]; tensor var_919_to_fp16 = const()[name = tensor("op_919_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_141_cast_fp16, y = var_919_to_fp16)[name = tensor("aw_chunk_141_cast_fp16")]; tensor var_921_to_fp16 = const()[name = tensor("op_921_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_143_cast_fp16, y = var_921_to_fp16)[name = tensor("aw_chunk_143_cast_fp16")]; tensor var_923_cast_fp16 = softmax(axis = var_619, x = aw_chunk_73_cast_fp16)[name = tensor("op_923_cast_fp16")]; tensor var_924_cast_fp16 = softmax(axis = var_619, x = aw_chunk_75_cast_fp16)[name = tensor("op_924_cast_fp16")]; tensor var_925_cast_fp16 = softmax(axis = var_619, x = aw_chunk_77_cast_fp16)[name = tensor("op_925_cast_fp16")]; tensor var_926_cast_fp16 = softmax(axis = var_619, x = aw_chunk_79_cast_fp16)[name = tensor("op_926_cast_fp16")]; tensor var_927_cast_fp16 = softmax(axis = var_619, x = aw_chunk_81_cast_fp16)[name = tensor("op_927_cast_fp16")]; tensor var_928_cast_fp16 = softmax(axis = var_619, x = aw_chunk_83_cast_fp16)[name = tensor("op_928_cast_fp16")]; tensor var_929_cast_fp16 = softmax(axis = var_619, x = aw_chunk_85_cast_fp16)[name = tensor("op_929_cast_fp16")]; tensor var_930_cast_fp16 = softmax(axis = var_619, x = aw_chunk_87_cast_fp16)[name = tensor("op_930_cast_fp16")]; tensor var_931_cast_fp16 = softmax(axis = var_619, x = aw_chunk_89_cast_fp16)[name = tensor("op_931_cast_fp16")]; tensor var_932_cast_fp16 = softmax(axis = var_619, x = aw_chunk_91_cast_fp16)[name = tensor("op_932_cast_fp16")]; tensor var_933_cast_fp16 = softmax(axis = var_619, x = aw_chunk_93_cast_fp16)[name = tensor("op_933_cast_fp16")]; tensor var_934_cast_fp16 = softmax(axis = var_619, x = aw_chunk_95_cast_fp16)[name = tensor("op_934_cast_fp16")]; tensor var_935_cast_fp16 = softmax(axis = var_619, x = aw_chunk_97_cast_fp16)[name = tensor("op_935_cast_fp16")]; tensor var_936_cast_fp16 = softmax(axis = var_619, x = aw_chunk_99_cast_fp16)[name = tensor("op_936_cast_fp16")]; tensor var_937_cast_fp16 = softmax(axis = var_619, x = aw_chunk_101_cast_fp16)[name = tensor("op_937_cast_fp16")]; tensor var_938_cast_fp16 = softmax(axis = var_619, x = aw_chunk_103_cast_fp16)[name = tensor("op_938_cast_fp16")]; tensor var_939_cast_fp16 = softmax(axis = var_619, x = aw_chunk_105_cast_fp16)[name = tensor("op_939_cast_fp16")]; tensor var_940_cast_fp16 = softmax(axis = var_619, x = aw_chunk_107_cast_fp16)[name = tensor("op_940_cast_fp16")]; tensor var_941_cast_fp16 = softmax(axis = var_619, x = aw_chunk_109_cast_fp16)[name = tensor("op_941_cast_fp16")]; tensor var_942_cast_fp16 = softmax(axis = var_619, x = aw_chunk_111_cast_fp16)[name = tensor("op_942_cast_fp16")]; tensor var_943_cast_fp16 = softmax(axis = var_619, x = aw_chunk_113_cast_fp16)[name = tensor("op_943_cast_fp16")]; tensor var_944_cast_fp16 = softmax(axis = var_619, x = aw_chunk_115_cast_fp16)[name = tensor("op_944_cast_fp16")]; tensor var_945_cast_fp16 = softmax(axis = var_619, x = aw_chunk_117_cast_fp16)[name = tensor("op_945_cast_fp16")]; tensor var_946_cast_fp16 = softmax(axis = var_619, x = aw_chunk_119_cast_fp16)[name = tensor("op_946_cast_fp16")]; tensor var_947_cast_fp16 = softmax(axis = var_619, x = aw_chunk_121_cast_fp16)[name = tensor("op_947_cast_fp16")]; tensor var_948_cast_fp16 = softmax(axis = var_619, x = aw_chunk_123_cast_fp16)[name = tensor("op_948_cast_fp16")]; tensor var_949_cast_fp16 = softmax(axis = var_619, x = aw_chunk_125_cast_fp16)[name = tensor("op_949_cast_fp16")]; tensor var_950_cast_fp16 = softmax(axis = var_619, x = aw_chunk_127_cast_fp16)[name = tensor("op_950_cast_fp16")]; tensor var_951_cast_fp16 = softmax(axis = var_619, x = aw_chunk_129_cast_fp16)[name = tensor("op_951_cast_fp16")]; tensor var_952_cast_fp16 = softmax(axis = var_619, x = aw_chunk_131_cast_fp16)[name = tensor("op_952_cast_fp16")]; tensor var_953_cast_fp16 = softmax(axis = var_619, x = aw_chunk_133_cast_fp16)[name = tensor("op_953_cast_fp16")]; tensor var_954_cast_fp16 = softmax(axis = var_619, x = aw_chunk_135_cast_fp16)[name = tensor("op_954_cast_fp16")]; tensor var_955_cast_fp16 = softmax(axis = var_619, x = aw_chunk_137_cast_fp16)[name = tensor("op_955_cast_fp16")]; tensor var_956_cast_fp16 = softmax(axis = var_619, x = aw_chunk_139_cast_fp16)[name = tensor("op_956_cast_fp16")]; tensor var_957_cast_fp16 = softmax(axis = var_619, x = aw_chunk_141_cast_fp16)[name = tensor("op_957_cast_fp16")]; tensor var_958_cast_fp16 = softmax(axis = var_619, x = aw_chunk_143_cast_fp16)[name = tensor("op_958_cast_fp16")]; tensor var_960_equation_0 = const()[name = tensor("op_960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_960_cast_fp16 = einsum(equation = var_960_equation_0, values = (var_756_cast_fp16, var_923_cast_fp16))[name = tensor("op_960_cast_fp16")]; tensor var_962_equation_0 = const()[name = tensor("op_962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_962_cast_fp16 = einsum(equation = var_962_equation_0, values = (var_756_cast_fp16, var_924_cast_fp16))[name = tensor("op_962_cast_fp16")]; tensor var_964_equation_0 = const()[name = tensor("op_964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_964_cast_fp16 = einsum(equation = var_964_equation_0, values = (var_756_cast_fp16, var_925_cast_fp16))[name = tensor("op_964_cast_fp16")]; tensor var_966_equation_0 = const()[name = tensor("op_966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_966_cast_fp16 = einsum(equation = var_966_equation_0, values = (var_756_cast_fp16, var_926_cast_fp16))[name = tensor("op_966_cast_fp16")]; tensor var_968_equation_0 = const()[name = tensor("op_968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_968_cast_fp16 = einsum(equation = var_968_equation_0, values = (var_756_cast_fp16, var_927_cast_fp16))[name = tensor("op_968_cast_fp16")]; tensor var_970_equation_0 = const()[name = tensor("op_970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_970_cast_fp16 = einsum(equation = var_970_equation_0, values = (var_756_cast_fp16, var_928_cast_fp16))[name = tensor("op_970_cast_fp16")]; tensor var_972_equation_0 = const()[name = tensor("op_972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_972_cast_fp16 = einsum(equation = var_972_equation_0, values = (var_760_cast_fp16, var_929_cast_fp16))[name = tensor("op_972_cast_fp16")]; tensor var_974_equation_0 = const()[name = tensor("op_974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_974_cast_fp16 = einsum(equation = var_974_equation_0, values = (var_760_cast_fp16, var_930_cast_fp16))[name = tensor("op_974_cast_fp16")]; tensor var_976_equation_0 = const()[name = tensor("op_976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_976_cast_fp16 = einsum(equation = var_976_equation_0, values = (var_760_cast_fp16, var_931_cast_fp16))[name = tensor("op_976_cast_fp16")]; tensor var_978_equation_0 = const()[name = tensor("op_978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_978_cast_fp16 = einsum(equation = var_978_equation_0, values = (var_760_cast_fp16, var_932_cast_fp16))[name = tensor("op_978_cast_fp16")]; tensor var_980_equation_0 = const()[name = tensor("op_980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_980_cast_fp16 = einsum(equation = var_980_equation_0, values = (var_760_cast_fp16, var_933_cast_fp16))[name = tensor("op_980_cast_fp16")]; tensor var_982_equation_0 = const()[name = tensor("op_982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_760_cast_fp16, var_934_cast_fp16))[name = tensor("op_982_cast_fp16")]; tensor var_984_equation_0 = const()[name = tensor("op_984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_984_cast_fp16 = einsum(equation = var_984_equation_0, values = (var_764_cast_fp16, var_935_cast_fp16))[name = tensor("op_984_cast_fp16")]; tensor var_986_equation_0 = const()[name = tensor("op_986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_764_cast_fp16, var_936_cast_fp16))[name = tensor("op_986_cast_fp16")]; tensor var_988_equation_0 = const()[name = tensor("op_988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_988_cast_fp16 = einsum(equation = var_988_equation_0, values = (var_764_cast_fp16, var_937_cast_fp16))[name = tensor("op_988_cast_fp16")]; tensor var_990_equation_0 = const()[name = tensor("op_990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_764_cast_fp16, var_938_cast_fp16))[name = tensor("op_990_cast_fp16")]; tensor var_992_equation_0 = const()[name = tensor("op_992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_992_cast_fp16 = einsum(equation = var_992_equation_0, values = (var_764_cast_fp16, var_939_cast_fp16))[name = tensor("op_992_cast_fp16")]; tensor var_994_equation_0 = const()[name = tensor("op_994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_764_cast_fp16, var_940_cast_fp16))[name = tensor("op_994_cast_fp16")]; tensor var_996_equation_0 = const()[name = tensor("op_996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_996_cast_fp16 = einsum(equation = var_996_equation_0, values = (var_768_cast_fp16, var_941_cast_fp16))[name = tensor("op_996_cast_fp16")]; tensor var_998_equation_0 = const()[name = tensor("op_998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_768_cast_fp16, var_942_cast_fp16))[name = tensor("op_998_cast_fp16")]; tensor var_1000_equation_0 = const()[name = tensor("op_1000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1000_cast_fp16 = einsum(equation = var_1000_equation_0, values = (var_768_cast_fp16, var_943_cast_fp16))[name = tensor("op_1000_cast_fp16")]; tensor var_1002_equation_0 = const()[name = tensor("op_1002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_768_cast_fp16, var_944_cast_fp16))[name = tensor("op_1002_cast_fp16")]; tensor var_1004_equation_0 = const()[name = tensor("op_1004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1004_cast_fp16 = einsum(equation = var_1004_equation_0, values = (var_768_cast_fp16, var_945_cast_fp16))[name = tensor("op_1004_cast_fp16")]; tensor var_1006_equation_0 = const()[name = tensor("op_1006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1006_cast_fp16 = einsum(equation = var_1006_equation_0, values = (var_768_cast_fp16, var_946_cast_fp16))[name = tensor("op_1006_cast_fp16")]; tensor var_1008_equation_0 = const()[name = tensor("op_1008_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1008_cast_fp16 = einsum(equation = var_1008_equation_0, values = (var_772_cast_fp16, var_947_cast_fp16))[name = tensor("op_1008_cast_fp16")]; tensor var_1010_equation_0 = const()[name = tensor("op_1010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1010_cast_fp16 = einsum(equation = var_1010_equation_0, values = (var_772_cast_fp16, var_948_cast_fp16))[name = tensor("op_1010_cast_fp16")]; tensor var_1012_equation_0 = const()[name = tensor("op_1012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1012_cast_fp16 = einsum(equation = var_1012_equation_0, values = (var_772_cast_fp16, var_949_cast_fp16))[name = tensor("op_1012_cast_fp16")]; tensor var_1014_equation_0 = const()[name = tensor("op_1014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1014_cast_fp16 = einsum(equation = var_1014_equation_0, values = (var_772_cast_fp16, var_950_cast_fp16))[name = tensor("op_1014_cast_fp16")]; tensor var_1016_equation_0 = const()[name = tensor("op_1016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1016_cast_fp16 = einsum(equation = var_1016_equation_0, values = (var_772_cast_fp16, var_951_cast_fp16))[name = tensor("op_1016_cast_fp16")]; tensor var_1018_equation_0 = const()[name = tensor("op_1018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1018_cast_fp16 = einsum(equation = var_1018_equation_0, values = (var_772_cast_fp16, var_952_cast_fp16))[name = tensor("op_1018_cast_fp16")]; tensor var_1020_equation_0 = const()[name = tensor("op_1020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1020_cast_fp16 = einsum(equation = var_1020_equation_0, values = (var_776_cast_fp16, var_953_cast_fp16))[name = tensor("op_1020_cast_fp16")]; tensor var_1022_equation_0 = const()[name = tensor("op_1022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1022_cast_fp16 = einsum(equation = var_1022_equation_0, values = (var_776_cast_fp16, var_954_cast_fp16))[name = tensor("op_1022_cast_fp16")]; tensor var_1024_equation_0 = const()[name = tensor("op_1024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1024_cast_fp16 = einsum(equation = var_1024_equation_0, values = (var_776_cast_fp16, var_955_cast_fp16))[name = tensor("op_1024_cast_fp16")]; tensor var_1026_equation_0 = const()[name = tensor("op_1026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1026_cast_fp16 = einsum(equation = var_1026_equation_0, values = (var_776_cast_fp16, var_956_cast_fp16))[name = tensor("op_1026_cast_fp16")]; tensor var_1028_equation_0 = const()[name = tensor("op_1028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1028_cast_fp16 = einsum(equation = var_1028_equation_0, values = (var_776_cast_fp16, var_957_cast_fp16))[name = tensor("op_1028_cast_fp16")]; tensor var_1030_equation_0 = const()[name = tensor("op_1030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1030_cast_fp16 = einsum(equation = var_1030_equation_0, values = (var_776_cast_fp16, var_958_cast_fp16))[name = tensor("op_1030_cast_fp16")]; tensor var_1032_interleave_0 = const()[name = tensor("op_1032_interleave_0"), val = tensor(false)]; tensor var_1032_cast_fp16 = concat(axis = var_607, interleave = var_1032_interleave_0, values = (var_960_cast_fp16, var_962_cast_fp16, var_964_cast_fp16, var_966_cast_fp16, var_968_cast_fp16, var_970_cast_fp16))[name = tensor("op_1032_cast_fp16")]; tensor var_1034_interleave_0 = const()[name = tensor("op_1034_interleave_0"), val = tensor(false)]; tensor var_1034_cast_fp16 = concat(axis = var_607, interleave = var_1034_interleave_0, values = (var_972_cast_fp16, var_974_cast_fp16, var_976_cast_fp16, var_978_cast_fp16, var_980_cast_fp16, var_982_cast_fp16))[name = tensor("op_1034_cast_fp16")]; tensor var_1036_interleave_0 = const()[name = tensor("op_1036_interleave_0"), val = tensor(false)]; tensor var_1036_cast_fp16 = concat(axis = var_607, interleave = var_1036_interleave_0, values = (var_984_cast_fp16, var_986_cast_fp16, var_988_cast_fp16, var_990_cast_fp16, var_992_cast_fp16, var_994_cast_fp16))[name = tensor("op_1036_cast_fp16")]; tensor var_1038_interleave_0 = const()[name = tensor("op_1038_interleave_0"), val = tensor(false)]; tensor var_1038_cast_fp16 = concat(axis = var_607, interleave = var_1038_interleave_0, values = (var_996_cast_fp16, var_998_cast_fp16, var_1000_cast_fp16, var_1002_cast_fp16, var_1004_cast_fp16, var_1006_cast_fp16))[name = tensor("op_1038_cast_fp16")]; tensor var_1040_interleave_0 = const()[name = tensor("op_1040_interleave_0"), val = tensor(false)]; tensor var_1040_cast_fp16 = concat(axis = var_607, interleave = var_1040_interleave_0, values = (var_1008_cast_fp16, var_1010_cast_fp16, var_1012_cast_fp16, var_1014_cast_fp16, var_1016_cast_fp16, var_1018_cast_fp16))[name = tensor("op_1040_cast_fp16")]; tensor var_1042_interleave_0 = const()[name = tensor("op_1042_interleave_0"), val = tensor(false)]; tensor var_1042_cast_fp16 = concat(axis = var_607, interleave = var_1042_interleave_0, values = (var_1020_cast_fp16, var_1022_cast_fp16, var_1024_cast_fp16, var_1026_cast_fp16, var_1028_cast_fp16, var_1030_cast_fp16))[name = tensor("op_1042_cast_fp16")]; tensor input_9_interleave_0 = const()[name = tensor("input_9_interleave_0"), val = tensor(false)]; tensor input_9_cast_fp16 = concat(axis = var_619, interleave = input_9_interleave_0, values = (var_1032_cast_fp16, var_1034_cast_fp16, var_1036_cast_fp16, var_1038_cast_fp16, var_1040_cast_fp16, var_1042_cast_fp16))[name = tensor("input_9_cast_fp16")]; tensor obj_7_pad_type_0 = const()[name = tensor("obj_7_pad_type_0"), val = tensor("valid")]; tensor obj_7_strides_0 = const()[name = tensor("obj_7_strides_0"), val = tensor([1, 1])]; tensor obj_7_pad_0 = const()[name = tensor("obj_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_7_dilations_0 = const()[name = tensor("obj_7_dilations_0"), val = tensor([1, 1])]; tensor obj_7_groups_0 = const()[name = tensor("obj_7_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6662016)))]; tensor layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6956992)))]; tensor obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = obj_7_dilations_0, groups = obj_7_groups_0, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = obj_7_strides_0, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("obj_7_cast_fp16")]; tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; tensor var_1061_to_fp16 = const()[name = tensor("op_1061_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_1061_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6957824)))]; tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6958656)))]; tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor input_13_pad_type_0 = const()[name = tensor("input_13_pad_type_0"), val = tensor("valid")]; tensor input_13_strides_0 = const()[name = tensor("input_13_strides_0"), val = tensor([1, 1])]; tensor input_13_pad_0 = const()[name = tensor("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_13_dilations_0 = const()[name = tensor("input_13_dilations_0"), val = tensor([1, 1])]; tensor input_13_groups_0 = const()[name = tensor("input_13_groups_0"), val = tensor(1)]; tensor layers_1_fc1_weight_to_fp16 = const()[name = tensor("layers_1_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6959488)))]; tensor layers_1_fc1_bias_to_fp16 = const()[name = tensor("layers_1_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8139200)))]; tensor input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor hidden_states_7_pad_type_0 = const()[name = tensor("hidden_states_7_pad_type_0"), val = tensor("valid")]; tensor hidden_states_7_strides_0 = const()[name = tensor("hidden_states_7_strides_0"), val = tensor([1, 1])]; tensor hidden_states_7_pad_0 = const()[name = tensor("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_7_dilations_0 = const()[name = tensor("hidden_states_7_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_7_groups_0 = const()[name = tensor("hidden_states_7_groups_0"), val = tensor(1)]; tensor layers_1_fc2_weight_to_fp16 = const()[name = tensor("layers_1_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8142336)))]; tensor layers_1_fc2_bias_to_fp16 = const()[name = tensor("layers_1_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9322048)))]; tensor hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; tensor var_1093 = const()[name = tensor("op_1093"), val = tensor(3)]; tensor var_1105 = const()[name = tensor("op_1105"), val = tensor(1)]; tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; tensor var_1122_to_fp16 = const()[name = tensor("op_1122_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_1122_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9322880)))]; tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9323712)))]; tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; tensor query_5_pad_type_0 = const()[name = tensor("query_5_pad_type_0"), val = tensor("valid")]; tensor query_5_strides_0 = const()[name = tensor("query_5_strides_0"), val = tensor([1, 1])]; tensor query_5_pad_0 = const()[name = tensor("query_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_5_dilations_0 = const()[name = tensor("query_5_dilations_0"), val = tensor([1, 1])]; tensor query_5_groups_0 = const()[name = tensor("query_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9324544)))]; tensor layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9619520)))]; tensor query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = query_5_dilations_0, groups = query_5_groups_0, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = query_5_strides_0, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("query_5_cast_fp16")]; tensor key_5_pad_type_0 = const()[name = tensor("key_5_pad_type_0"), val = tensor("valid")]; tensor key_5_strides_0 = const()[name = tensor("key_5_strides_0"), val = tensor([1, 1])]; tensor key_5_pad_0 = const()[name = tensor("key_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_5_dilations_0 = const()[name = tensor("key_5_dilations_0"), val = tensor([1, 1])]; tensor key_5_groups_0 = const()[name = tensor("key_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9620352)))]; tensor key_5_cast_fp16 = conv(dilations = key_5_dilations_0, groups = key_5_groups_0, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = key_5_strides_0, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("key_5_cast_fp16")]; tensor value_5_pad_type_0 = const()[name = tensor("value_5_pad_type_0"), val = tensor("valid")]; tensor value_5_strides_0 = const()[name = tensor("value_5_strides_0"), val = tensor([1, 1])]; tensor value_5_pad_0 = const()[name = tensor("value_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_5_dilations_0 = const()[name = tensor("value_5_dilations_0"), val = tensor([1, 1])]; tensor value_5_groups_0 = const()[name = tensor("value_5_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9915328)))]; tensor layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10210304)))]; tensor value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = value_5_dilations_0, groups = value_5_groups_0, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = value_5_strides_0, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("value_5_cast_fp16")]; tensor var_1157_begin_0 = const()[name = tensor("op_1157_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1157_end_0 = const()[name = tensor("op_1157_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1157_end_mask_0 = const()[name = tensor("op_1157_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1157_cast_fp16 = slice_by_index(begin = var_1157_begin_0, end = var_1157_end_0, end_mask = var_1157_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1157_cast_fp16")]; tensor var_1161_begin_0 = const()[name = tensor("op_1161_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1161_end_0 = const()[name = tensor("op_1161_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1161_end_mask_0 = const()[name = tensor("op_1161_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1161_cast_fp16 = slice_by_index(begin = var_1161_begin_0, end = var_1161_end_0, end_mask = var_1161_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1161_cast_fp16")]; tensor var_1165_begin_0 = const()[name = tensor("op_1165_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1165_end_0 = const()[name = tensor("op_1165_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1165_end_mask_0 = const()[name = tensor("op_1165_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1165_cast_fp16 = slice_by_index(begin = var_1165_begin_0, end = var_1165_end_0, end_mask = var_1165_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1165_cast_fp16")]; tensor var_1169_begin_0 = const()[name = tensor("op_1169_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1169_end_0 = const()[name = tensor("op_1169_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1169_end_mask_0 = const()[name = tensor("op_1169_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1169_cast_fp16 = slice_by_index(begin = var_1169_begin_0, end = var_1169_end_0, end_mask = var_1169_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1169_cast_fp16")]; tensor var_1173_begin_0 = const()[name = tensor("op_1173_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1173_end_0 = const()[name = tensor("op_1173_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1173_end_mask_0 = const()[name = tensor("op_1173_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1173_cast_fp16 = slice_by_index(begin = var_1173_begin_0, end = var_1173_end_0, end_mask = var_1173_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1173_cast_fp16")]; tensor var_1177_begin_0 = const()[name = tensor("op_1177_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1177_end_0 = const()[name = tensor("op_1177_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1177_end_mask_0 = const()[name = tensor("op_1177_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1177_cast_fp16 = slice_by_index(begin = var_1177_begin_0, end = var_1177_end_0, end_mask = var_1177_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_1177_cast_fp16")]; tensor var_1180_begin_0 = const()[name = tensor("op_1180_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1180_end_0 = const()[name = tensor("op_1180_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1180_end_mask_0 = const()[name = tensor("op_1180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1180_cast_fp16 = slice_by_index(begin = var_1180_begin_0, end = var_1180_end_0, end_mask = var_1180_end_mask_0, x = var_1157_cast_fp16)[name = tensor("op_1180_cast_fp16")]; tensor var_1181_begin_0 = const()[name = tensor("op_1181_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1181_end_0 = const()[name = tensor("op_1181_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1181_end_mask_0 = const()[name = tensor("op_1181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1181_cast_fp16 = slice_by_index(begin = var_1181_begin_0, end = var_1181_end_0, end_mask = var_1181_end_mask_0, x = var_1157_cast_fp16)[name = tensor("op_1181_cast_fp16")]; tensor var_1182_begin_0 = const()[name = tensor("op_1182_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1182_end_0 = const()[name = tensor("op_1182_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1182_end_mask_0 = const()[name = tensor("op_1182_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1182_cast_fp16 = slice_by_index(begin = var_1182_begin_0, end = var_1182_end_0, end_mask = var_1182_end_mask_0, x = var_1157_cast_fp16)[name = tensor("op_1182_cast_fp16")]; tensor var_1183_begin_0 = const()[name = tensor("op_1183_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1183_end_0 = const()[name = tensor("op_1183_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1183_end_mask_0 = const()[name = tensor("op_1183_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1183_cast_fp16 = slice_by_index(begin = var_1183_begin_0, end = var_1183_end_0, end_mask = var_1183_end_mask_0, x = var_1157_cast_fp16)[name = tensor("op_1183_cast_fp16")]; tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = var_1157_cast_fp16)[name = tensor("op_1184_cast_fp16")]; tensor var_1185_begin_0 = const()[name = tensor("op_1185_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1185_end_0 = const()[name = tensor("op_1185_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1185_end_mask_0 = const()[name = tensor("op_1185_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1185_cast_fp16 = slice_by_index(begin = var_1185_begin_0, end = var_1185_end_0, end_mask = var_1185_end_mask_0, x = var_1157_cast_fp16)[name = tensor("op_1185_cast_fp16")]; tensor var_1186_begin_0 = const()[name = tensor("op_1186_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1186_end_0 = const()[name = tensor("op_1186_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1186_end_mask_0 = const()[name = tensor("op_1186_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, x = var_1161_cast_fp16)[name = tensor("op_1186_cast_fp16")]; tensor var_1187_begin_0 = const()[name = tensor("op_1187_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1187_end_0 = const()[name = tensor("op_1187_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1187_end_mask_0 = const()[name = tensor("op_1187_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1187_cast_fp16 = slice_by_index(begin = var_1187_begin_0, end = var_1187_end_0, end_mask = var_1187_end_mask_0, x = var_1161_cast_fp16)[name = tensor("op_1187_cast_fp16")]; tensor var_1188_begin_0 = const()[name = tensor("op_1188_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1188_end_0 = const()[name = tensor("op_1188_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1188_end_mask_0 = const()[name = tensor("op_1188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1188_cast_fp16 = slice_by_index(begin = var_1188_begin_0, end = var_1188_end_0, end_mask = var_1188_end_mask_0, x = var_1161_cast_fp16)[name = tensor("op_1188_cast_fp16")]; tensor var_1189_begin_0 = const()[name = tensor("op_1189_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1189_end_0 = const()[name = tensor("op_1189_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1189_end_mask_0 = const()[name = tensor("op_1189_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1189_cast_fp16 = slice_by_index(begin = var_1189_begin_0, end = var_1189_end_0, end_mask = var_1189_end_mask_0, x = var_1161_cast_fp16)[name = tensor("op_1189_cast_fp16")]; tensor var_1190_begin_0 = const()[name = tensor("op_1190_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1190_end_0 = const()[name = tensor("op_1190_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1190_end_mask_0 = const()[name = tensor("op_1190_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1190_cast_fp16 = slice_by_index(begin = var_1190_begin_0, end = var_1190_end_0, end_mask = var_1190_end_mask_0, x = var_1161_cast_fp16)[name = tensor("op_1190_cast_fp16")]; tensor var_1191_begin_0 = const()[name = tensor("op_1191_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1191_end_0 = const()[name = tensor("op_1191_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1191_end_mask_0 = const()[name = tensor("op_1191_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1191_cast_fp16 = slice_by_index(begin = var_1191_begin_0, end = var_1191_end_0, end_mask = var_1191_end_mask_0, x = var_1161_cast_fp16)[name = tensor("op_1191_cast_fp16")]; tensor var_1192_begin_0 = const()[name = tensor("op_1192_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1192_end_0 = const()[name = tensor("op_1192_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1192_end_mask_0 = const()[name = tensor("op_1192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1192_cast_fp16 = slice_by_index(begin = var_1192_begin_0, end = var_1192_end_0, end_mask = var_1192_end_mask_0, x = var_1165_cast_fp16)[name = tensor("op_1192_cast_fp16")]; tensor var_1193_begin_0 = const()[name = tensor("op_1193_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1193_end_0 = const()[name = tensor("op_1193_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1193_end_mask_0 = const()[name = tensor("op_1193_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1193_cast_fp16 = slice_by_index(begin = var_1193_begin_0, end = var_1193_end_0, end_mask = var_1193_end_mask_0, x = var_1165_cast_fp16)[name = tensor("op_1193_cast_fp16")]; tensor var_1194_begin_0 = const()[name = tensor("op_1194_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1194_end_0 = const()[name = tensor("op_1194_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1194_end_mask_0 = const()[name = tensor("op_1194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = var_1194_end_0, end_mask = var_1194_end_mask_0, x = var_1165_cast_fp16)[name = tensor("op_1194_cast_fp16")]; tensor var_1195_begin_0 = const()[name = tensor("op_1195_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1195_end_0 = const()[name = tensor("op_1195_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1195_end_mask_0 = const()[name = tensor("op_1195_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1195_cast_fp16 = slice_by_index(begin = var_1195_begin_0, end = var_1195_end_0, end_mask = var_1195_end_mask_0, x = var_1165_cast_fp16)[name = tensor("op_1195_cast_fp16")]; tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = var_1165_cast_fp16)[name = tensor("op_1196_cast_fp16")]; tensor var_1197_begin_0 = const()[name = tensor("op_1197_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1197_end_0 = const()[name = tensor("op_1197_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1197_end_mask_0 = const()[name = tensor("op_1197_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1197_cast_fp16 = slice_by_index(begin = var_1197_begin_0, end = var_1197_end_0, end_mask = var_1197_end_mask_0, x = var_1165_cast_fp16)[name = tensor("op_1197_cast_fp16")]; tensor var_1198_begin_0 = const()[name = tensor("op_1198_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1198_end_0 = const()[name = tensor("op_1198_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1198_end_mask_0 = const()[name = tensor("op_1198_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1198_cast_fp16 = slice_by_index(begin = var_1198_begin_0, end = var_1198_end_0, end_mask = var_1198_end_mask_0, x = var_1169_cast_fp16)[name = tensor("op_1198_cast_fp16")]; tensor var_1199_begin_0 = const()[name = tensor("op_1199_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1199_end_0 = const()[name = tensor("op_1199_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1199_end_mask_0 = const()[name = tensor("op_1199_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1199_cast_fp16 = slice_by_index(begin = var_1199_begin_0, end = var_1199_end_0, end_mask = var_1199_end_mask_0, x = var_1169_cast_fp16)[name = tensor("op_1199_cast_fp16")]; tensor var_1200_begin_0 = const()[name = tensor("op_1200_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1200_end_0 = const()[name = tensor("op_1200_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1200_end_mask_0 = const()[name = tensor("op_1200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1200_cast_fp16 = slice_by_index(begin = var_1200_begin_0, end = var_1200_end_0, end_mask = var_1200_end_mask_0, x = var_1169_cast_fp16)[name = tensor("op_1200_cast_fp16")]; tensor var_1201_begin_0 = const()[name = tensor("op_1201_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1201_end_0 = const()[name = tensor("op_1201_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1201_end_mask_0 = const()[name = tensor("op_1201_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1201_cast_fp16 = slice_by_index(begin = var_1201_begin_0, end = var_1201_end_0, end_mask = var_1201_end_mask_0, x = var_1169_cast_fp16)[name = tensor("op_1201_cast_fp16")]; tensor var_1202_begin_0 = const()[name = tensor("op_1202_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1202_end_0 = const()[name = tensor("op_1202_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1202_end_mask_0 = const()[name = tensor("op_1202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, x = var_1169_cast_fp16)[name = tensor("op_1202_cast_fp16")]; tensor var_1203_begin_0 = const()[name = tensor("op_1203_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1203_end_0 = const()[name = tensor("op_1203_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1203_end_mask_0 = const()[name = tensor("op_1203_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1203_cast_fp16 = slice_by_index(begin = var_1203_begin_0, end = var_1203_end_0, end_mask = var_1203_end_mask_0, x = var_1169_cast_fp16)[name = tensor("op_1203_cast_fp16")]; tensor var_1204_begin_0 = const()[name = tensor("op_1204_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1204_end_0 = const()[name = tensor("op_1204_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1204_end_mask_0 = const()[name = tensor("op_1204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1204_cast_fp16 = slice_by_index(begin = var_1204_begin_0, end = var_1204_end_0, end_mask = var_1204_end_mask_0, x = var_1173_cast_fp16)[name = tensor("op_1204_cast_fp16")]; tensor var_1205_begin_0 = const()[name = tensor("op_1205_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1205_end_0 = const()[name = tensor("op_1205_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1205_end_mask_0 = const()[name = tensor("op_1205_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1205_cast_fp16 = slice_by_index(begin = var_1205_begin_0, end = var_1205_end_0, end_mask = var_1205_end_mask_0, x = var_1173_cast_fp16)[name = tensor("op_1205_cast_fp16")]; tensor var_1206_begin_0 = const()[name = tensor("op_1206_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1206_end_0 = const()[name = tensor("op_1206_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1206_end_mask_0 = const()[name = tensor("op_1206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1206_cast_fp16 = slice_by_index(begin = var_1206_begin_0, end = var_1206_end_0, end_mask = var_1206_end_mask_0, x = var_1173_cast_fp16)[name = tensor("op_1206_cast_fp16")]; tensor var_1207_begin_0 = const()[name = tensor("op_1207_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1207_end_0 = const()[name = tensor("op_1207_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1207_end_mask_0 = const()[name = tensor("op_1207_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1207_cast_fp16 = slice_by_index(begin = var_1207_begin_0, end = var_1207_end_0, end_mask = var_1207_end_mask_0, x = var_1173_cast_fp16)[name = tensor("op_1207_cast_fp16")]; tensor var_1208_begin_0 = const()[name = tensor("op_1208_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1208_end_0 = const()[name = tensor("op_1208_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1208_end_mask_0 = const()[name = tensor("op_1208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1208_cast_fp16 = slice_by_index(begin = var_1208_begin_0, end = var_1208_end_0, end_mask = var_1208_end_mask_0, x = var_1173_cast_fp16)[name = tensor("op_1208_cast_fp16")]; tensor var_1209_begin_0 = const()[name = tensor("op_1209_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1209_end_0 = const()[name = tensor("op_1209_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1209_end_mask_0 = const()[name = tensor("op_1209_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1209_cast_fp16 = slice_by_index(begin = var_1209_begin_0, end = var_1209_end_0, end_mask = var_1209_end_mask_0, x = var_1173_cast_fp16)[name = tensor("op_1209_cast_fp16")]; tensor var_1210_begin_0 = const()[name = tensor("op_1210_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1210_end_0 = const()[name = tensor("op_1210_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1210_end_mask_0 = const()[name = tensor("op_1210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1210_cast_fp16 = slice_by_index(begin = var_1210_begin_0, end = var_1210_end_0, end_mask = var_1210_end_mask_0, x = var_1177_cast_fp16)[name = tensor("op_1210_cast_fp16")]; tensor var_1211_begin_0 = const()[name = tensor("op_1211_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1211_end_0 = const()[name = tensor("op_1211_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1211_end_mask_0 = const()[name = tensor("op_1211_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1211_cast_fp16 = slice_by_index(begin = var_1211_begin_0, end = var_1211_end_0, end_mask = var_1211_end_mask_0, x = var_1177_cast_fp16)[name = tensor("op_1211_cast_fp16")]; tensor var_1212_begin_0 = const()[name = tensor("op_1212_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1212_end_0 = const()[name = tensor("op_1212_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1212_end_mask_0 = const()[name = tensor("op_1212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1212_cast_fp16 = slice_by_index(begin = var_1212_begin_0, end = var_1212_end_0, end_mask = var_1212_end_mask_0, x = var_1177_cast_fp16)[name = tensor("op_1212_cast_fp16")]; tensor var_1213_begin_0 = const()[name = tensor("op_1213_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1213_end_0 = const()[name = tensor("op_1213_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1213_end_mask_0 = const()[name = tensor("op_1213_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1213_cast_fp16 = slice_by_index(begin = var_1213_begin_0, end = var_1213_end_0, end_mask = var_1213_end_mask_0, x = var_1177_cast_fp16)[name = tensor("op_1213_cast_fp16")]; tensor var_1214_begin_0 = const()[name = tensor("op_1214_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1214_end_0 = const()[name = tensor("op_1214_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1214_end_mask_0 = const()[name = tensor("op_1214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1214_cast_fp16 = slice_by_index(begin = var_1214_begin_0, end = var_1214_end_0, end_mask = var_1214_end_mask_0, x = var_1177_cast_fp16)[name = tensor("op_1214_cast_fp16")]; tensor var_1215_begin_0 = const()[name = tensor("op_1215_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1215_end_0 = const()[name = tensor("op_1215_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1215_end_mask_0 = const()[name = tensor("op_1215_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1215_cast_fp16 = slice_by_index(begin = var_1215_begin_0, end = var_1215_end_0, end_mask = var_1215_end_mask_0, x = var_1177_cast_fp16)[name = tensor("op_1215_cast_fp16")]; tensor k_5_perm_0 = const()[name = tensor("k_5_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1220_begin_0 = const()[name = tensor("op_1220_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1220_end_0 = const()[name = tensor("op_1220_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_1220_end_mask_0 = const()[name = tensor("op_1220_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor("transpose_1")]; tensor var_1220_cast_fp16 = slice_by_index(begin = var_1220_begin_0, end = var_1220_end_0, end_mask = var_1220_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1220_cast_fp16")]; tensor var_1224_begin_0 = const()[name = tensor("op_1224_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_1224_end_0 = const()[name = tensor("op_1224_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_1224_end_mask_0 = const()[name = tensor("op_1224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1224_cast_fp16 = slice_by_index(begin = var_1224_begin_0, end = var_1224_end_0, end_mask = var_1224_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1224_cast_fp16")]; tensor var_1228_begin_0 = const()[name = tensor("op_1228_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_1228_end_0 = const()[name = tensor("op_1228_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_1228_end_mask_0 = const()[name = tensor("op_1228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1228_cast_fp16 = slice_by_index(begin = var_1228_begin_0, end = var_1228_end_0, end_mask = var_1228_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1228_cast_fp16")]; tensor var_1232_begin_0 = const()[name = tensor("op_1232_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_1232_end_0 = const()[name = tensor("op_1232_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_1232_end_mask_0 = const()[name = tensor("op_1232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1232_cast_fp16 = slice_by_index(begin = var_1232_begin_0, end = var_1232_end_0, end_mask = var_1232_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1232_cast_fp16")]; tensor var_1236_begin_0 = const()[name = tensor("op_1236_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1236_end_0 = const()[name = tensor("op_1236_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_1236_end_mask_0 = const()[name = tensor("op_1236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1236_cast_fp16 = slice_by_index(begin = var_1236_begin_0, end = var_1236_end_0, end_mask = var_1236_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1236_cast_fp16")]; tensor var_1240_begin_0 = const()[name = tensor("op_1240_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_1240_end_0 = const()[name = tensor("op_1240_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_1240_end_mask_0 = const()[name = tensor("op_1240_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1240_cast_fp16 = slice_by_index(begin = var_1240_begin_0, end = var_1240_end_0, end_mask = var_1240_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_1240_cast_fp16")]; tensor var_1242_begin_0 = const()[name = tensor("op_1242_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1242_end_0 = const()[name = tensor("op_1242_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1242_end_mask_0 = const()[name = tensor("op_1242_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1242_cast_fp16 = slice_by_index(begin = var_1242_begin_0, end = var_1242_end_0, end_mask = var_1242_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1242_cast_fp16")]; tensor var_1246_begin_0 = const()[name = tensor("op_1246_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1246_end_0 = const()[name = tensor("op_1246_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1246_end_mask_0 = const()[name = tensor("op_1246_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1246_cast_fp16")]; tensor var_1250_begin_0 = const()[name = tensor("op_1250_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1250_end_0 = const()[name = tensor("op_1250_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1250_end_mask_0 = const()[name = tensor("op_1250_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1250_cast_fp16 = slice_by_index(begin = var_1250_begin_0, end = var_1250_end_0, end_mask = var_1250_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1250_cast_fp16")]; tensor var_1254_begin_0 = const()[name = tensor("op_1254_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1254_end_0 = const()[name = tensor("op_1254_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1254_end_mask_0 = const()[name = tensor("op_1254_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1254_cast_fp16 = slice_by_index(begin = var_1254_begin_0, end = var_1254_end_0, end_mask = var_1254_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1254_cast_fp16")]; tensor var_1258_begin_0 = const()[name = tensor("op_1258_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1258_end_0 = const()[name = tensor("op_1258_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1258_end_mask_0 = const()[name = tensor("op_1258_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1258_cast_fp16 = slice_by_index(begin = var_1258_begin_0, end = var_1258_end_0, end_mask = var_1258_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1258_cast_fp16")]; tensor var_1262_begin_0 = const()[name = tensor("op_1262_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1262_end_0 = const()[name = tensor("op_1262_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1262_end_mask_0 = const()[name = tensor("op_1262_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1262_cast_fp16 = slice_by_index(begin = var_1262_begin_0, end = var_1262_end_0, end_mask = var_1262_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_1262_cast_fp16")]; tensor _SplitHeadsQ__mh_w_145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_145_equation_0, values = (var_1220_cast_fp16, var_1180_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_147_equation_0, values = (var_1220_cast_fp16, var_1181_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_149_equation_0, values = (var_1220_cast_fp16, var_1182_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_151_equation_0, values = (var_1220_cast_fp16, var_1183_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_153_equation_0, values = (var_1220_cast_fp16, var_1184_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_155_equation_0, values = (var_1220_cast_fp16, var_1185_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_157_equation_0, values = (var_1224_cast_fp16, var_1186_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_159_equation_0, values = (var_1224_cast_fp16, var_1187_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_161_equation_0, values = (var_1224_cast_fp16, var_1188_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_163_equation_0, values = (var_1224_cast_fp16, var_1189_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_165_equation_0, values = (var_1224_cast_fp16, var_1190_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_167_equation_0, values = (var_1224_cast_fp16, var_1191_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_169_equation_0, values = (var_1228_cast_fp16, var_1192_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_171_equation_0, values = (var_1228_cast_fp16, var_1193_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_173_equation_0, values = (var_1228_cast_fp16, var_1194_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_175_equation_0, values = (var_1228_cast_fp16, var_1195_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_177_equation_0, values = (var_1228_cast_fp16, var_1196_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_179_equation_0, values = (var_1228_cast_fp16, var_1197_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_181_equation_0, values = (var_1232_cast_fp16, var_1198_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_183_equation_0, values = (var_1232_cast_fp16, var_1199_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_185_equation_0, values = (var_1232_cast_fp16, var_1200_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_187_equation_0, values = (var_1232_cast_fp16, var_1201_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_189_equation_0, values = (var_1232_cast_fp16, var_1202_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_191_equation_0, values = (var_1232_cast_fp16, var_1203_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_193_equation_0, values = (var_1236_cast_fp16, var_1204_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_195_equation_0, values = (var_1236_cast_fp16, var_1205_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_197_equation_0, values = (var_1236_cast_fp16, var_1206_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_199_equation_0, values = (var_1236_cast_fp16, var_1207_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_201_equation_0, values = (var_1236_cast_fp16, var_1208_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_203_equation_0, values = (var_1236_cast_fp16, var_1209_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_205_equation_0, values = (var_1240_cast_fp16, var_1210_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_207_equation_0, values = (var_1240_cast_fp16, var_1211_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_209_equation_0, values = (var_1240_cast_fp16, var_1212_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_211_equation_0, values = (var_1240_cast_fp16, var_1213_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_213_equation_0, values = (var_1240_cast_fp16, var_1214_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_215_equation_0, values = (var_1240_cast_fp16, var_1215_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_215_cast_fp16")]; tensor var_1337_to_fp16 = const()[name = tensor("op_1337_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_145_cast_fp16, y = var_1337_to_fp16)[name = tensor("aw_chunk_145_cast_fp16")]; tensor var_1339_to_fp16 = const()[name = tensor("op_1339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_147_cast_fp16, y = var_1339_to_fp16)[name = tensor("aw_chunk_147_cast_fp16")]; tensor var_1341_to_fp16 = const()[name = tensor("op_1341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_149_cast_fp16, y = var_1341_to_fp16)[name = tensor("aw_chunk_149_cast_fp16")]; tensor var_1343_to_fp16 = const()[name = tensor("op_1343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_151_cast_fp16, y = var_1343_to_fp16)[name = tensor("aw_chunk_151_cast_fp16")]; tensor var_1345_to_fp16 = const()[name = tensor("op_1345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_153_cast_fp16, y = var_1345_to_fp16)[name = tensor("aw_chunk_153_cast_fp16")]; tensor var_1347_to_fp16 = const()[name = tensor("op_1347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_155_cast_fp16, y = var_1347_to_fp16)[name = tensor("aw_chunk_155_cast_fp16")]; tensor var_1349_to_fp16 = const()[name = tensor("op_1349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_157_cast_fp16, y = var_1349_to_fp16)[name = tensor("aw_chunk_157_cast_fp16")]; tensor var_1351_to_fp16 = const()[name = tensor("op_1351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_159_cast_fp16, y = var_1351_to_fp16)[name = tensor("aw_chunk_159_cast_fp16")]; tensor var_1353_to_fp16 = const()[name = tensor("op_1353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_161_cast_fp16, y = var_1353_to_fp16)[name = tensor("aw_chunk_161_cast_fp16")]; tensor var_1355_to_fp16 = const()[name = tensor("op_1355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_163_cast_fp16, y = var_1355_to_fp16)[name = tensor("aw_chunk_163_cast_fp16")]; tensor var_1357_to_fp16 = const()[name = tensor("op_1357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_165_cast_fp16, y = var_1357_to_fp16)[name = tensor("aw_chunk_165_cast_fp16")]; tensor var_1359_to_fp16 = const()[name = tensor("op_1359_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_167_cast_fp16, y = var_1359_to_fp16)[name = tensor("aw_chunk_167_cast_fp16")]; tensor var_1361_to_fp16 = const()[name = tensor("op_1361_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_169_cast_fp16, y = var_1361_to_fp16)[name = tensor("aw_chunk_169_cast_fp16")]; tensor var_1363_to_fp16 = const()[name = tensor("op_1363_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_171_cast_fp16, y = var_1363_to_fp16)[name = tensor("aw_chunk_171_cast_fp16")]; tensor var_1365_to_fp16 = const()[name = tensor("op_1365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_173_cast_fp16, y = var_1365_to_fp16)[name = tensor("aw_chunk_173_cast_fp16")]; tensor var_1367_to_fp16 = const()[name = tensor("op_1367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_175_cast_fp16, y = var_1367_to_fp16)[name = tensor("aw_chunk_175_cast_fp16")]; tensor var_1369_to_fp16 = const()[name = tensor("op_1369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_177_cast_fp16, y = var_1369_to_fp16)[name = tensor("aw_chunk_177_cast_fp16")]; tensor var_1371_to_fp16 = const()[name = tensor("op_1371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_179_cast_fp16, y = var_1371_to_fp16)[name = tensor("aw_chunk_179_cast_fp16")]; tensor var_1373_to_fp16 = const()[name = tensor("op_1373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_181_cast_fp16, y = var_1373_to_fp16)[name = tensor("aw_chunk_181_cast_fp16")]; tensor var_1375_to_fp16 = const()[name = tensor("op_1375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_183_cast_fp16, y = var_1375_to_fp16)[name = tensor("aw_chunk_183_cast_fp16")]; tensor var_1377_to_fp16 = const()[name = tensor("op_1377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_185_cast_fp16, y = var_1377_to_fp16)[name = tensor("aw_chunk_185_cast_fp16")]; tensor var_1379_to_fp16 = const()[name = tensor("op_1379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_187_cast_fp16, y = var_1379_to_fp16)[name = tensor("aw_chunk_187_cast_fp16")]; tensor var_1381_to_fp16 = const()[name = tensor("op_1381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_189_cast_fp16, y = var_1381_to_fp16)[name = tensor("aw_chunk_189_cast_fp16")]; tensor var_1383_to_fp16 = const()[name = tensor("op_1383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_191_cast_fp16, y = var_1383_to_fp16)[name = tensor("aw_chunk_191_cast_fp16")]; tensor var_1385_to_fp16 = const()[name = tensor("op_1385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_193_cast_fp16, y = var_1385_to_fp16)[name = tensor("aw_chunk_193_cast_fp16")]; tensor var_1387_to_fp16 = const()[name = tensor("op_1387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_195_cast_fp16, y = var_1387_to_fp16)[name = tensor("aw_chunk_195_cast_fp16")]; tensor var_1389_to_fp16 = const()[name = tensor("op_1389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_197_cast_fp16, y = var_1389_to_fp16)[name = tensor("aw_chunk_197_cast_fp16")]; tensor var_1391_to_fp16 = const()[name = tensor("op_1391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_199_cast_fp16, y = var_1391_to_fp16)[name = tensor("aw_chunk_199_cast_fp16")]; tensor var_1393_to_fp16 = const()[name = tensor("op_1393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_201_cast_fp16, y = var_1393_to_fp16)[name = tensor("aw_chunk_201_cast_fp16")]; tensor var_1395_to_fp16 = const()[name = tensor("op_1395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_203_cast_fp16, y = var_1395_to_fp16)[name = tensor("aw_chunk_203_cast_fp16")]; tensor var_1397_to_fp16 = const()[name = tensor("op_1397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_205_cast_fp16, y = var_1397_to_fp16)[name = tensor("aw_chunk_205_cast_fp16")]; tensor var_1399_to_fp16 = const()[name = tensor("op_1399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_207_cast_fp16, y = var_1399_to_fp16)[name = tensor("aw_chunk_207_cast_fp16")]; tensor var_1401_to_fp16 = const()[name = tensor("op_1401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_209_cast_fp16, y = var_1401_to_fp16)[name = tensor("aw_chunk_209_cast_fp16")]; tensor var_1403_to_fp16 = const()[name = tensor("op_1403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_211_cast_fp16, y = var_1403_to_fp16)[name = tensor("aw_chunk_211_cast_fp16")]; tensor var_1405_to_fp16 = const()[name = tensor("op_1405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_213_cast_fp16, y = var_1405_to_fp16)[name = tensor("aw_chunk_213_cast_fp16")]; tensor var_1407_to_fp16 = const()[name = tensor("op_1407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_215_cast_fp16, y = var_1407_to_fp16)[name = tensor("aw_chunk_215_cast_fp16")]; tensor var_1409_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_145_cast_fp16)[name = tensor("op_1409_cast_fp16")]; tensor var_1410_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_147_cast_fp16)[name = tensor("op_1410_cast_fp16")]; tensor var_1411_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_149_cast_fp16)[name = tensor("op_1411_cast_fp16")]; tensor var_1412_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_151_cast_fp16)[name = tensor("op_1412_cast_fp16")]; tensor var_1413_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_153_cast_fp16)[name = tensor("op_1413_cast_fp16")]; tensor var_1414_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_155_cast_fp16)[name = tensor("op_1414_cast_fp16")]; tensor var_1415_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_157_cast_fp16)[name = tensor("op_1415_cast_fp16")]; tensor var_1416_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_159_cast_fp16)[name = tensor("op_1416_cast_fp16")]; tensor var_1417_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_161_cast_fp16)[name = tensor("op_1417_cast_fp16")]; tensor var_1418_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_163_cast_fp16)[name = tensor("op_1418_cast_fp16")]; tensor var_1419_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_165_cast_fp16)[name = tensor("op_1419_cast_fp16")]; tensor var_1420_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_167_cast_fp16)[name = tensor("op_1420_cast_fp16")]; tensor var_1421_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_169_cast_fp16)[name = tensor("op_1421_cast_fp16")]; tensor var_1422_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_171_cast_fp16)[name = tensor("op_1422_cast_fp16")]; tensor var_1423_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_173_cast_fp16)[name = tensor("op_1423_cast_fp16")]; tensor var_1424_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_175_cast_fp16)[name = tensor("op_1424_cast_fp16")]; tensor var_1425_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_177_cast_fp16)[name = tensor("op_1425_cast_fp16")]; tensor var_1426_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_179_cast_fp16)[name = tensor("op_1426_cast_fp16")]; tensor var_1427_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_181_cast_fp16)[name = tensor("op_1427_cast_fp16")]; tensor var_1428_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_183_cast_fp16)[name = tensor("op_1428_cast_fp16")]; tensor var_1429_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_185_cast_fp16)[name = tensor("op_1429_cast_fp16")]; tensor var_1430_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_187_cast_fp16)[name = tensor("op_1430_cast_fp16")]; tensor var_1431_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_189_cast_fp16)[name = tensor("op_1431_cast_fp16")]; tensor var_1432_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_191_cast_fp16)[name = tensor("op_1432_cast_fp16")]; tensor var_1433_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_193_cast_fp16)[name = tensor("op_1433_cast_fp16")]; tensor var_1434_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_195_cast_fp16)[name = tensor("op_1434_cast_fp16")]; tensor var_1435_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_197_cast_fp16)[name = tensor("op_1435_cast_fp16")]; tensor var_1436_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_199_cast_fp16)[name = tensor("op_1436_cast_fp16")]; tensor var_1437_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_201_cast_fp16)[name = tensor("op_1437_cast_fp16")]; tensor var_1438_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_203_cast_fp16)[name = tensor("op_1438_cast_fp16")]; tensor var_1439_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_205_cast_fp16)[name = tensor("op_1439_cast_fp16")]; tensor var_1440_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_207_cast_fp16)[name = tensor("op_1440_cast_fp16")]; tensor var_1441_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_209_cast_fp16)[name = tensor("op_1441_cast_fp16")]; tensor var_1442_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_211_cast_fp16)[name = tensor("op_1442_cast_fp16")]; tensor var_1443_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_213_cast_fp16)[name = tensor("op_1443_cast_fp16")]; tensor var_1444_cast_fp16 = softmax(axis = var_1105, x = aw_chunk_215_cast_fp16)[name = tensor("op_1444_cast_fp16")]; tensor var_1446_equation_0 = const()[name = tensor("op_1446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1446_cast_fp16 = einsum(equation = var_1446_equation_0, values = (var_1242_cast_fp16, var_1409_cast_fp16))[name = tensor("op_1446_cast_fp16")]; tensor var_1448_equation_0 = const()[name = tensor("op_1448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1448_cast_fp16 = einsum(equation = var_1448_equation_0, values = (var_1242_cast_fp16, var_1410_cast_fp16))[name = tensor("op_1448_cast_fp16")]; tensor var_1450_equation_0 = const()[name = tensor("op_1450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1450_cast_fp16 = einsum(equation = var_1450_equation_0, values = (var_1242_cast_fp16, var_1411_cast_fp16))[name = tensor("op_1450_cast_fp16")]; tensor var_1452_equation_0 = const()[name = tensor("op_1452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1452_cast_fp16 = einsum(equation = var_1452_equation_0, values = (var_1242_cast_fp16, var_1412_cast_fp16))[name = tensor("op_1452_cast_fp16")]; tensor var_1454_equation_0 = const()[name = tensor("op_1454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1454_cast_fp16 = einsum(equation = var_1454_equation_0, values = (var_1242_cast_fp16, var_1413_cast_fp16))[name = tensor("op_1454_cast_fp16")]; tensor var_1456_equation_0 = const()[name = tensor("op_1456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1456_cast_fp16 = einsum(equation = var_1456_equation_0, values = (var_1242_cast_fp16, var_1414_cast_fp16))[name = tensor("op_1456_cast_fp16")]; tensor var_1458_equation_0 = const()[name = tensor("op_1458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1458_cast_fp16 = einsum(equation = var_1458_equation_0, values = (var_1246_cast_fp16, var_1415_cast_fp16))[name = tensor("op_1458_cast_fp16")]; tensor var_1460_equation_0 = const()[name = tensor("op_1460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1460_cast_fp16 = einsum(equation = var_1460_equation_0, values = (var_1246_cast_fp16, var_1416_cast_fp16))[name = tensor("op_1460_cast_fp16")]; tensor var_1462_equation_0 = const()[name = tensor("op_1462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1462_cast_fp16 = einsum(equation = var_1462_equation_0, values = (var_1246_cast_fp16, var_1417_cast_fp16))[name = tensor("op_1462_cast_fp16")]; tensor var_1464_equation_0 = const()[name = tensor("op_1464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1464_cast_fp16 = einsum(equation = var_1464_equation_0, values = (var_1246_cast_fp16, var_1418_cast_fp16))[name = tensor("op_1464_cast_fp16")]; tensor var_1466_equation_0 = const()[name = tensor("op_1466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1466_cast_fp16 = einsum(equation = var_1466_equation_0, values = (var_1246_cast_fp16, var_1419_cast_fp16))[name = tensor("op_1466_cast_fp16")]; tensor var_1468_equation_0 = const()[name = tensor("op_1468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1468_cast_fp16 = einsum(equation = var_1468_equation_0, values = (var_1246_cast_fp16, var_1420_cast_fp16))[name = tensor("op_1468_cast_fp16")]; tensor var_1470_equation_0 = const()[name = tensor("op_1470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1470_cast_fp16 = einsum(equation = var_1470_equation_0, values = (var_1250_cast_fp16, var_1421_cast_fp16))[name = tensor("op_1470_cast_fp16")]; tensor var_1472_equation_0 = const()[name = tensor("op_1472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1472_cast_fp16 = einsum(equation = var_1472_equation_0, values = (var_1250_cast_fp16, var_1422_cast_fp16))[name = tensor("op_1472_cast_fp16")]; tensor var_1474_equation_0 = const()[name = tensor("op_1474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1250_cast_fp16, var_1423_cast_fp16))[name = tensor("op_1474_cast_fp16")]; tensor var_1476_equation_0 = const()[name = tensor("op_1476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1250_cast_fp16, var_1424_cast_fp16))[name = tensor("op_1476_cast_fp16")]; tensor var_1478_equation_0 = const()[name = tensor("op_1478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1478_cast_fp16 = einsum(equation = var_1478_equation_0, values = (var_1250_cast_fp16, var_1425_cast_fp16))[name = tensor("op_1478_cast_fp16")]; tensor var_1480_equation_0 = const()[name = tensor("op_1480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1480_cast_fp16 = einsum(equation = var_1480_equation_0, values = (var_1250_cast_fp16, var_1426_cast_fp16))[name = tensor("op_1480_cast_fp16")]; tensor var_1482_equation_0 = const()[name = tensor("op_1482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1482_cast_fp16 = einsum(equation = var_1482_equation_0, values = (var_1254_cast_fp16, var_1427_cast_fp16))[name = tensor("op_1482_cast_fp16")]; tensor var_1484_equation_0 = const()[name = tensor("op_1484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1484_cast_fp16 = einsum(equation = var_1484_equation_0, values = (var_1254_cast_fp16, var_1428_cast_fp16))[name = tensor("op_1484_cast_fp16")]; tensor var_1486_equation_0 = const()[name = tensor("op_1486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1486_cast_fp16 = einsum(equation = var_1486_equation_0, values = (var_1254_cast_fp16, var_1429_cast_fp16))[name = tensor("op_1486_cast_fp16")]; tensor var_1488_equation_0 = const()[name = tensor("op_1488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1488_cast_fp16 = einsum(equation = var_1488_equation_0, values = (var_1254_cast_fp16, var_1430_cast_fp16))[name = tensor("op_1488_cast_fp16")]; tensor var_1490_equation_0 = const()[name = tensor("op_1490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1490_cast_fp16 = einsum(equation = var_1490_equation_0, values = (var_1254_cast_fp16, var_1431_cast_fp16))[name = tensor("op_1490_cast_fp16")]; tensor var_1492_equation_0 = const()[name = tensor("op_1492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1492_cast_fp16 = einsum(equation = var_1492_equation_0, values = (var_1254_cast_fp16, var_1432_cast_fp16))[name = tensor("op_1492_cast_fp16")]; tensor var_1494_equation_0 = const()[name = tensor("op_1494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1494_cast_fp16 = einsum(equation = var_1494_equation_0, values = (var_1258_cast_fp16, var_1433_cast_fp16))[name = tensor("op_1494_cast_fp16")]; tensor var_1496_equation_0 = const()[name = tensor("op_1496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1496_cast_fp16 = einsum(equation = var_1496_equation_0, values = (var_1258_cast_fp16, var_1434_cast_fp16))[name = tensor("op_1496_cast_fp16")]; tensor var_1498_equation_0 = const()[name = tensor("op_1498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1498_cast_fp16 = einsum(equation = var_1498_equation_0, values = (var_1258_cast_fp16, var_1435_cast_fp16))[name = tensor("op_1498_cast_fp16")]; tensor var_1500_equation_0 = const()[name = tensor("op_1500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1500_cast_fp16 = einsum(equation = var_1500_equation_0, values = (var_1258_cast_fp16, var_1436_cast_fp16))[name = tensor("op_1500_cast_fp16")]; tensor var_1502_equation_0 = const()[name = tensor("op_1502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1502_cast_fp16 = einsum(equation = var_1502_equation_0, values = (var_1258_cast_fp16, var_1437_cast_fp16))[name = tensor("op_1502_cast_fp16")]; tensor var_1504_equation_0 = const()[name = tensor("op_1504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1504_cast_fp16 = einsum(equation = var_1504_equation_0, values = (var_1258_cast_fp16, var_1438_cast_fp16))[name = tensor("op_1504_cast_fp16")]; tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1262_cast_fp16, var_1439_cast_fp16))[name = tensor("op_1506_cast_fp16")]; tensor var_1508_equation_0 = const()[name = tensor("op_1508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1508_cast_fp16 = einsum(equation = var_1508_equation_0, values = (var_1262_cast_fp16, var_1440_cast_fp16))[name = tensor("op_1508_cast_fp16")]; tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1262_cast_fp16, var_1441_cast_fp16))[name = tensor("op_1510_cast_fp16")]; tensor var_1512_equation_0 = const()[name = tensor("op_1512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1512_cast_fp16 = einsum(equation = var_1512_equation_0, values = (var_1262_cast_fp16, var_1442_cast_fp16))[name = tensor("op_1512_cast_fp16")]; tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1262_cast_fp16, var_1443_cast_fp16))[name = tensor("op_1514_cast_fp16")]; tensor var_1516_equation_0 = const()[name = tensor("op_1516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1516_cast_fp16 = einsum(equation = var_1516_equation_0, values = (var_1262_cast_fp16, var_1444_cast_fp16))[name = tensor("op_1516_cast_fp16")]; tensor var_1518_interleave_0 = const()[name = tensor("op_1518_interleave_0"), val = tensor(false)]; tensor var_1518_cast_fp16 = concat(axis = var_1093, interleave = var_1518_interleave_0, values = (var_1446_cast_fp16, var_1448_cast_fp16, var_1450_cast_fp16, var_1452_cast_fp16, var_1454_cast_fp16, var_1456_cast_fp16))[name = tensor("op_1518_cast_fp16")]; tensor var_1520_interleave_0 = const()[name = tensor("op_1520_interleave_0"), val = tensor(false)]; tensor var_1520_cast_fp16 = concat(axis = var_1093, interleave = var_1520_interleave_0, values = (var_1458_cast_fp16, var_1460_cast_fp16, var_1462_cast_fp16, var_1464_cast_fp16, var_1466_cast_fp16, var_1468_cast_fp16))[name = tensor("op_1520_cast_fp16")]; tensor var_1522_interleave_0 = const()[name = tensor("op_1522_interleave_0"), val = tensor(false)]; tensor var_1522_cast_fp16 = concat(axis = var_1093, interleave = var_1522_interleave_0, values = (var_1470_cast_fp16, var_1472_cast_fp16, var_1474_cast_fp16, var_1476_cast_fp16, var_1478_cast_fp16, var_1480_cast_fp16))[name = tensor("op_1522_cast_fp16")]; tensor var_1524_interleave_0 = const()[name = tensor("op_1524_interleave_0"), val = tensor(false)]; tensor var_1524_cast_fp16 = concat(axis = var_1093, interleave = var_1524_interleave_0, values = (var_1482_cast_fp16, var_1484_cast_fp16, var_1486_cast_fp16, var_1488_cast_fp16, var_1490_cast_fp16, var_1492_cast_fp16))[name = tensor("op_1524_cast_fp16")]; tensor var_1526_interleave_0 = const()[name = tensor("op_1526_interleave_0"), val = tensor(false)]; tensor var_1526_cast_fp16 = concat(axis = var_1093, interleave = var_1526_interleave_0, values = (var_1494_cast_fp16, var_1496_cast_fp16, var_1498_cast_fp16, var_1500_cast_fp16, var_1502_cast_fp16, var_1504_cast_fp16))[name = tensor("op_1526_cast_fp16")]; tensor var_1528_interleave_0 = const()[name = tensor("op_1528_interleave_0"), val = tensor(false)]; tensor var_1528_cast_fp16 = concat(axis = var_1093, interleave = var_1528_interleave_0, values = (var_1506_cast_fp16, var_1508_cast_fp16, var_1510_cast_fp16, var_1512_cast_fp16, var_1514_cast_fp16, var_1516_cast_fp16))[name = tensor("op_1528_cast_fp16")]; tensor input_17_interleave_0 = const()[name = tensor("input_17_interleave_0"), val = tensor(false)]; tensor input_17_cast_fp16 = concat(axis = var_1105, interleave = input_17_interleave_0, values = (var_1518_cast_fp16, var_1520_cast_fp16, var_1522_cast_fp16, var_1524_cast_fp16, var_1526_cast_fp16, var_1528_cast_fp16))[name = tensor("input_17_cast_fp16")]; tensor obj_11_pad_type_0 = const()[name = tensor("obj_11_pad_type_0"), val = tensor("valid")]; tensor obj_11_strides_0 = const()[name = tensor("obj_11_strides_0"), val = tensor([1, 1])]; tensor obj_11_pad_0 = const()[name = tensor("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_11_dilations_0 = const()[name = tensor("obj_11_dilations_0"), val = tensor([1, 1])]; tensor obj_11_groups_0 = const()[name = tensor("obj_11_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10211136)))]; tensor layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10506112)))]; tensor obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = obj_11_dilations_0, groups = obj_11_groups_0, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = obj_11_strides_0, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("obj_11_cast_fp16")]; tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; tensor var_1547_to_fp16 = const()[name = tensor("op_1547_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_1547_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10506944)))]; tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10507776)))]; tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_21_pad_type_0 = const()[name = tensor("input_21_pad_type_0"), val = tensor("valid")]; tensor input_21_strides_0 = const()[name = tensor("input_21_strides_0"), val = tensor([1, 1])]; tensor input_21_pad_0 = const()[name = tensor("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_21_dilations_0 = const()[name = tensor("input_21_dilations_0"), val = tensor([1, 1])]; tensor input_21_groups_0 = const()[name = tensor("input_21_groups_0"), val = tensor(1)]; tensor layers_2_fc1_weight_to_fp16 = const()[name = tensor("layers_2_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10508608)))]; tensor layers_2_fc1_bias_to_fp16 = const()[name = tensor("layers_2_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11688320)))]; tensor input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor hidden_states_9_pad_type_0 = const()[name = tensor("hidden_states_9_pad_type_0"), val = tensor("valid")]; tensor hidden_states_9_strides_0 = const()[name = tensor("hidden_states_9_strides_0"), val = tensor([1, 1])]; tensor hidden_states_9_pad_0 = const()[name = tensor("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_9_dilations_0 = const()[name = tensor("hidden_states_9_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_9_groups_0 = const()[name = tensor("hidden_states_9_groups_0"), val = tensor(1)]; tensor layers_2_fc2_weight_to_fp16 = const()[name = tensor("layers_2_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11691456)))]; tensor layers_2_fc2_bias_to_fp16 = const()[name = tensor("layers_2_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12871168)))]; tensor hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = hidden_states_9_dilations_0, groups = hidden_states_9_groups_0, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = hidden_states_9_strides_0, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; tensor var_1579 = const()[name = tensor("op_1579"), val = tensor(3)]; tensor var_1591 = const()[name = tensor("op_1591"), val = tensor(1)]; tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; tensor var_1608_to_fp16 = const()[name = tensor("op_1608_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_1608_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12872000)))]; tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12872832)))]; tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; tensor query_pad_type_0 = const()[name = tensor("query_pad_type_0"), val = tensor("valid")]; tensor query_strides_0 = const()[name = tensor("query_strides_0"), val = tensor([1, 1])]; tensor query_pad_0 = const()[name = tensor("query_pad_0"), val = tensor([0, 0, 0, 0])]; tensor query_dilations_0 = const()[name = tensor("query_dilations_0"), val = tensor([1, 1])]; tensor query_groups_0 = const()[name = tensor("query_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12873664)))]; tensor layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13168640)))]; tensor query_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = query_dilations_0, groups = query_groups_0, pad = query_pad_0, pad_type = query_pad_type_0, strides = query_strides_0, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("query_cast_fp16")]; tensor key_pad_type_0 = const()[name = tensor("key_pad_type_0"), val = tensor("valid")]; tensor key_strides_0 = const()[name = tensor("key_strides_0"), val = tensor([1, 1])]; tensor key_pad_0 = const()[name = tensor("key_pad_0"), val = tensor([0, 0, 0, 0])]; tensor key_dilations_0 = const()[name = tensor("key_dilations_0"), val = tensor([1, 1])]; tensor key_groups_0 = const()[name = tensor("key_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13169472)))]; tensor key_cast_fp16 = conv(dilations = key_dilations_0, groups = key_groups_0, pad = key_pad_0, pad_type = key_pad_type_0, strides = key_strides_0, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("key_cast_fp16")]; tensor value_pad_type_0 = const()[name = tensor("value_pad_type_0"), val = tensor("valid")]; tensor value_strides_0 = const()[name = tensor("value_strides_0"), val = tensor([1, 1])]; tensor value_pad_0 = const()[name = tensor("value_pad_0"), val = tensor([0, 0, 0, 0])]; tensor value_dilations_0 = const()[name = tensor("value_dilations_0"), val = tensor([1, 1])]; tensor value_groups_0 = const()[name = tensor("value_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13464448)))]; tensor layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13759424)))]; tensor value_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = value_dilations_0, groups = value_groups_0, pad = value_pad_0, pad_type = value_pad_type_0, strides = value_strides_0, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("value_cast_fp16")]; tensor var_1643_begin_0 = const()[name = tensor("op_1643_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1643_end_0 = const()[name = tensor("op_1643_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1643_end_mask_0 = const()[name = tensor("op_1643_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1643_cast_fp16 = slice_by_index(begin = var_1643_begin_0, end = var_1643_end_0, end_mask = var_1643_end_mask_0, x = query_cast_fp16)[name = tensor("op_1643_cast_fp16")]; tensor var_1647_begin_0 = const()[name = tensor("op_1647_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1647_end_0 = const()[name = tensor("op_1647_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1647_end_mask_0 = const()[name = tensor("op_1647_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1647_cast_fp16 = slice_by_index(begin = var_1647_begin_0, end = var_1647_end_0, end_mask = var_1647_end_mask_0, x = query_cast_fp16)[name = tensor("op_1647_cast_fp16")]; tensor var_1651_begin_0 = const()[name = tensor("op_1651_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1651_end_0 = const()[name = tensor("op_1651_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1651_end_mask_0 = const()[name = tensor("op_1651_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1651_cast_fp16 = slice_by_index(begin = var_1651_begin_0, end = var_1651_end_0, end_mask = var_1651_end_mask_0, x = query_cast_fp16)[name = tensor("op_1651_cast_fp16")]; tensor var_1655_begin_0 = const()[name = tensor("op_1655_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1655_end_0 = const()[name = tensor("op_1655_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1655_end_mask_0 = const()[name = tensor("op_1655_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1655_cast_fp16 = slice_by_index(begin = var_1655_begin_0, end = var_1655_end_0, end_mask = var_1655_end_mask_0, x = query_cast_fp16)[name = tensor("op_1655_cast_fp16")]; tensor var_1659_begin_0 = const()[name = tensor("op_1659_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1659_end_0 = const()[name = tensor("op_1659_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1659_end_mask_0 = const()[name = tensor("op_1659_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1659_cast_fp16 = slice_by_index(begin = var_1659_begin_0, end = var_1659_end_0, end_mask = var_1659_end_mask_0, x = query_cast_fp16)[name = tensor("op_1659_cast_fp16")]; tensor var_1663_begin_0 = const()[name = tensor("op_1663_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1663_end_0 = const()[name = tensor("op_1663_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1663_end_mask_0 = const()[name = tensor("op_1663_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1663_cast_fp16 = slice_by_index(begin = var_1663_begin_0, end = var_1663_end_0, end_mask = var_1663_end_mask_0, x = query_cast_fp16)[name = tensor("op_1663_cast_fp16")]; tensor var_1666_begin_0 = const()[name = tensor("op_1666_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1666_end_0 = const()[name = tensor("op_1666_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1666_end_mask_0 = const()[name = tensor("op_1666_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1666_cast_fp16 = slice_by_index(begin = var_1666_begin_0, end = var_1666_end_0, end_mask = var_1666_end_mask_0, x = var_1643_cast_fp16)[name = tensor("op_1666_cast_fp16")]; tensor var_1667_begin_0 = const()[name = tensor("op_1667_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1667_end_0 = const()[name = tensor("op_1667_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1667_end_mask_0 = const()[name = tensor("op_1667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1667_cast_fp16 = slice_by_index(begin = var_1667_begin_0, end = var_1667_end_0, end_mask = var_1667_end_mask_0, x = var_1643_cast_fp16)[name = tensor("op_1667_cast_fp16")]; tensor var_1668_begin_0 = const()[name = tensor("op_1668_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1668_end_0 = const()[name = tensor("op_1668_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1668_end_mask_0 = const()[name = tensor("op_1668_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1668_cast_fp16 = slice_by_index(begin = var_1668_begin_0, end = var_1668_end_0, end_mask = var_1668_end_mask_0, x = var_1643_cast_fp16)[name = tensor("op_1668_cast_fp16")]; tensor var_1669_begin_0 = const()[name = tensor("op_1669_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1669_end_0 = const()[name = tensor("op_1669_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1669_end_mask_0 = const()[name = tensor("op_1669_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1669_cast_fp16 = slice_by_index(begin = var_1669_begin_0, end = var_1669_end_0, end_mask = var_1669_end_mask_0, x = var_1643_cast_fp16)[name = tensor("op_1669_cast_fp16")]; tensor var_1670_begin_0 = const()[name = tensor("op_1670_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1670_end_0 = const()[name = tensor("op_1670_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1670_end_mask_0 = const()[name = tensor("op_1670_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1670_cast_fp16 = slice_by_index(begin = var_1670_begin_0, end = var_1670_end_0, end_mask = var_1670_end_mask_0, x = var_1643_cast_fp16)[name = tensor("op_1670_cast_fp16")]; tensor var_1671_begin_0 = const()[name = tensor("op_1671_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1671_end_0 = const()[name = tensor("op_1671_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1671_end_mask_0 = const()[name = tensor("op_1671_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1671_cast_fp16 = slice_by_index(begin = var_1671_begin_0, end = var_1671_end_0, end_mask = var_1671_end_mask_0, x = var_1643_cast_fp16)[name = tensor("op_1671_cast_fp16")]; tensor var_1672_begin_0 = const()[name = tensor("op_1672_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1672_end_0 = const()[name = tensor("op_1672_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1672_end_mask_0 = const()[name = tensor("op_1672_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1672_cast_fp16 = slice_by_index(begin = var_1672_begin_0, end = var_1672_end_0, end_mask = var_1672_end_mask_0, x = var_1647_cast_fp16)[name = tensor("op_1672_cast_fp16")]; tensor var_1673_begin_0 = const()[name = tensor("op_1673_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1673_end_0 = const()[name = tensor("op_1673_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1673_end_mask_0 = const()[name = tensor("op_1673_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1673_cast_fp16 = slice_by_index(begin = var_1673_begin_0, end = var_1673_end_0, end_mask = var_1673_end_mask_0, x = var_1647_cast_fp16)[name = tensor("op_1673_cast_fp16")]; tensor var_1674_begin_0 = const()[name = tensor("op_1674_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1674_end_0 = const()[name = tensor("op_1674_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1674_end_mask_0 = const()[name = tensor("op_1674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1674_cast_fp16 = slice_by_index(begin = var_1674_begin_0, end = var_1674_end_0, end_mask = var_1674_end_mask_0, x = var_1647_cast_fp16)[name = tensor("op_1674_cast_fp16")]; tensor var_1675_begin_0 = const()[name = tensor("op_1675_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1675_end_0 = const()[name = tensor("op_1675_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1675_end_mask_0 = const()[name = tensor("op_1675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1675_cast_fp16 = slice_by_index(begin = var_1675_begin_0, end = var_1675_end_0, end_mask = var_1675_end_mask_0, x = var_1647_cast_fp16)[name = tensor("op_1675_cast_fp16")]; tensor var_1676_begin_0 = const()[name = tensor("op_1676_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1676_end_0 = const()[name = tensor("op_1676_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1676_end_mask_0 = const()[name = tensor("op_1676_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1676_cast_fp16 = slice_by_index(begin = var_1676_begin_0, end = var_1676_end_0, end_mask = var_1676_end_mask_0, x = var_1647_cast_fp16)[name = tensor("op_1676_cast_fp16")]; tensor var_1677_begin_0 = const()[name = tensor("op_1677_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1677_end_0 = const()[name = tensor("op_1677_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1677_end_mask_0 = const()[name = tensor("op_1677_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1677_cast_fp16 = slice_by_index(begin = var_1677_begin_0, end = var_1677_end_0, end_mask = var_1677_end_mask_0, x = var_1647_cast_fp16)[name = tensor("op_1677_cast_fp16")]; tensor var_1678_begin_0 = const()[name = tensor("op_1678_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1678_end_0 = const()[name = tensor("op_1678_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1678_end_mask_0 = const()[name = tensor("op_1678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1678_cast_fp16 = slice_by_index(begin = var_1678_begin_0, end = var_1678_end_0, end_mask = var_1678_end_mask_0, x = var_1651_cast_fp16)[name = tensor("op_1678_cast_fp16")]; tensor var_1679_begin_0 = const()[name = tensor("op_1679_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1679_end_0 = const()[name = tensor("op_1679_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1679_end_mask_0 = const()[name = tensor("op_1679_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1679_cast_fp16 = slice_by_index(begin = var_1679_begin_0, end = var_1679_end_0, end_mask = var_1679_end_mask_0, x = var_1651_cast_fp16)[name = tensor("op_1679_cast_fp16")]; tensor var_1680_begin_0 = const()[name = tensor("op_1680_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1680_end_0 = const()[name = tensor("op_1680_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1680_end_mask_0 = const()[name = tensor("op_1680_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1680_cast_fp16 = slice_by_index(begin = var_1680_begin_0, end = var_1680_end_0, end_mask = var_1680_end_mask_0, x = var_1651_cast_fp16)[name = tensor("op_1680_cast_fp16")]; tensor var_1681_begin_0 = const()[name = tensor("op_1681_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1681_end_0 = const()[name = tensor("op_1681_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1681_end_mask_0 = const()[name = tensor("op_1681_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1681_cast_fp16 = slice_by_index(begin = var_1681_begin_0, end = var_1681_end_0, end_mask = var_1681_end_mask_0, x = var_1651_cast_fp16)[name = tensor("op_1681_cast_fp16")]; tensor var_1682_begin_0 = const()[name = tensor("op_1682_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1682_end_0 = const()[name = tensor("op_1682_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1682_end_mask_0 = const()[name = tensor("op_1682_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1682_cast_fp16 = slice_by_index(begin = var_1682_begin_0, end = var_1682_end_0, end_mask = var_1682_end_mask_0, x = var_1651_cast_fp16)[name = tensor("op_1682_cast_fp16")]; tensor var_1683_begin_0 = const()[name = tensor("op_1683_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1683_end_0 = const()[name = tensor("op_1683_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1683_end_mask_0 = const()[name = tensor("op_1683_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1683_cast_fp16 = slice_by_index(begin = var_1683_begin_0, end = var_1683_end_0, end_mask = var_1683_end_mask_0, x = var_1651_cast_fp16)[name = tensor("op_1683_cast_fp16")]; tensor var_1684_begin_0 = const()[name = tensor("op_1684_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1684_end_0 = const()[name = tensor("op_1684_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1684_end_mask_0 = const()[name = tensor("op_1684_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1684_cast_fp16 = slice_by_index(begin = var_1684_begin_0, end = var_1684_end_0, end_mask = var_1684_end_mask_0, x = var_1655_cast_fp16)[name = tensor("op_1684_cast_fp16")]; tensor var_1685_begin_0 = const()[name = tensor("op_1685_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1685_end_0 = const()[name = tensor("op_1685_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1685_end_mask_0 = const()[name = tensor("op_1685_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1685_cast_fp16 = slice_by_index(begin = var_1685_begin_0, end = var_1685_end_0, end_mask = var_1685_end_mask_0, x = var_1655_cast_fp16)[name = tensor("op_1685_cast_fp16")]; tensor var_1686_begin_0 = const()[name = tensor("op_1686_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1686_end_0 = const()[name = tensor("op_1686_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1686_end_mask_0 = const()[name = tensor("op_1686_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1686_cast_fp16 = slice_by_index(begin = var_1686_begin_0, end = var_1686_end_0, end_mask = var_1686_end_mask_0, x = var_1655_cast_fp16)[name = tensor("op_1686_cast_fp16")]; tensor var_1687_begin_0 = const()[name = tensor("op_1687_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1687_end_0 = const()[name = tensor("op_1687_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1687_end_mask_0 = const()[name = tensor("op_1687_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1687_cast_fp16 = slice_by_index(begin = var_1687_begin_0, end = var_1687_end_0, end_mask = var_1687_end_mask_0, x = var_1655_cast_fp16)[name = tensor("op_1687_cast_fp16")]; tensor var_1688_begin_0 = const()[name = tensor("op_1688_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1688_end_0 = const()[name = tensor("op_1688_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1688_end_mask_0 = const()[name = tensor("op_1688_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1688_cast_fp16 = slice_by_index(begin = var_1688_begin_0, end = var_1688_end_0, end_mask = var_1688_end_mask_0, x = var_1655_cast_fp16)[name = tensor("op_1688_cast_fp16")]; tensor var_1689_begin_0 = const()[name = tensor("op_1689_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1689_end_0 = const()[name = tensor("op_1689_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1689_end_mask_0 = const()[name = tensor("op_1689_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1689_cast_fp16 = slice_by_index(begin = var_1689_begin_0, end = var_1689_end_0, end_mask = var_1689_end_mask_0, x = var_1655_cast_fp16)[name = tensor("op_1689_cast_fp16")]; tensor var_1690_begin_0 = const()[name = tensor("op_1690_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1690_end_0 = const()[name = tensor("op_1690_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1690_end_mask_0 = const()[name = tensor("op_1690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1690_cast_fp16 = slice_by_index(begin = var_1690_begin_0, end = var_1690_end_0, end_mask = var_1690_end_mask_0, x = var_1659_cast_fp16)[name = tensor("op_1690_cast_fp16")]; tensor var_1691_begin_0 = const()[name = tensor("op_1691_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1691_end_0 = const()[name = tensor("op_1691_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1691_end_mask_0 = const()[name = tensor("op_1691_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1691_cast_fp16 = slice_by_index(begin = var_1691_begin_0, end = var_1691_end_0, end_mask = var_1691_end_mask_0, x = var_1659_cast_fp16)[name = tensor("op_1691_cast_fp16")]; tensor var_1692_begin_0 = const()[name = tensor("op_1692_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1692_end_0 = const()[name = tensor("op_1692_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1692_end_mask_0 = const()[name = tensor("op_1692_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1692_cast_fp16 = slice_by_index(begin = var_1692_begin_0, end = var_1692_end_0, end_mask = var_1692_end_mask_0, x = var_1659_cast_fp16)[name = tensor("op_1692_cast_fp16")]; tensor var_1693_begin_0 = const()[name = tensor("op_1693_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1693_end_0 = const()[name = tensor("op_1693_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1693_end_mask_0 = const()[name = tensor("op_1693_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1693_cast_fp16 = slice_by_index(begin = var_1693_begin_0, end = var_1693_end_0, end_mask = var_1693_end_mask_0, x = var_1659_cast_fp16)[name = tensor("op_1693_cast_fp16")]; tensor var_1694_begin_0 = const()[name = tensor("op_1694_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1694_end_0 = const()[name = tensor("op_1694_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1694_end_mask_0 = const()[name = tensor("op_1694_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1694_cast_fp16 = slice_by_index(begin = var_1694_begin_0, end = var_1694_end_0, end_mask = var_1694_end_mask_0, x = var_1659_cast_fp16)[name = tensor("op_1694_cast_fp16")]; tensor var_1695_begin_0 = const()[name = tensor("op_1695_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1695_end_0 = const()[name = tensor("op_1695_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1695_end_mask_0 = const()[name = tensor("op_1695_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1695_cast_fp16 = slice_by_index(begin = var_1695_begin_0, end = var_1695_end_0, end_mask = var_1695_end_mask_0, x = var_1659_cast_fp16)[name = tensor("op_1695_cast_fp16")]; tensor var_1696_begin_0 = const()[name = tensor("op_1696_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1696_end_0 = const()[name = tensor("op_1696_end_0"), val = tensor([1, 64, 1, 256])]; tensor var_1696_end_mask_0 = const()[name = tensor("op_1696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1696_cast_fp16 = slice_by_index(begin = var_1696_begin_0, end = var_1696_end_0, end_mask = var_1696_end_mask_0, x = var_1663_cast_fp16)[name = tensor("op_1696_cast_fp16")]; tensor var_1697_begin_0 = const()[name = tensor("op_1697_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1697_end_0 = const()[name = tensor("op_1697_end_0"), val = tensor([1, 64, 1, 512])]; tensor var_1697_end_mask_0 = const()[name = tensor("op_1697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1697_cast_fp16 = slice_by_index(begin = var_1697_begin_0, end = var_1697_end_0, end_mask = var_1697_end_mask_0, x = var_1663_cast_fp16)[name = tensor("op_1697_cast_fp16")]; tensor var_1698_begin_0 = const()[name = tensor("op_1698_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1698_end_0 = const()[name = tensor("op_1698_end_0"), val = tensor([1, 64, 1, 768])]; tensor var_1698_end_mask_0 = const()[name = tensor("op_1698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1698_cast_fp16 = slice_by_index(begin = var_1698_begin_0, end = var_1698_end_0, end_mask = var_1698_end_mask_0, x = var_1663_cast_fp16)[name = tensor("op_1698_cast_fp16")]; tensor var_1699_begin_0 = const()[name = tensor("op_1699_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1699_end_0 = const()[name = tensor("op_1699_end_0"), val = tensor([1, 64, 1, 1024])]; tensor var_1699_end_mask_0 = const()[name = tensor("op_1699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1699_cast_fp16 = slice_by_index(begin = var_1699_begin_0, end = var_1699_end_0, end_mask = var_1699_end_mask_0, x = var_1663_cast_fp16)[name = tensor("op_1699_cast_fp16")]; tensor var_1700_begin_0 = const()[name = tensor("op_1700_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1700_end_0 = const()[name = tensor("op_1700_end_0"), val = tensor([1, 64, 1, 1280])]; tensor var_1700_end_mask_0 = const()[name = tensor("op_1700_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1700_cast_fp16 = slice_by_index(begin = var_1700_begin_0, end = var_1700_end_0, end_mask = var_1700_end_mask_0, x = var_1663_cast_fp16)[name = tensor("op_1700_cast_fp16")]; tensor var_1701_begin_0 = const()[name = tensor("op_1701_begin_0"), val = tensor([0, 0, 0, 1280])]; tensor var_1701_end_0 = const()[name = tensor("op_1701_end_0"), val = tensor([1, 64, 1, 1])]; tensor var_1701_end_mask_0 = const()[name = tensor("op_1701_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1701_cast_fp16 = slice_by_index(begin = var_1701_begin_0, end = var_1701_end_0, end_mask = var_1701_end_mask_0, x = var_1663_cast_fp16)[name = tensor("op_1701_cast_fp16")]; tensor k_perm_0 = const()[name = tensor("k_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1706_begin_0 = const()[name = tensor("op_1706_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1706_end_0 = const()[name = tensor("op_1706_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_1706_end_mask_0 = const()[name = tensor("op_1706_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor("transpose_0")]; tensor var_1706_cast_fp16 = slice_by_index(begin = var_1706_begin_0, end = var_1706_end_0, end_mask = var_1706_end_mask_0, x = k_cast_fp16)[name = tensor("op_1706_cast_fp16")]; tensor var_1710_begin_0 = const()[name = tensor("op_1710_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_1710_end_0 = const()[name = tensor("op_1710_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_1710_end_mask_0 = const()[name = tensor("op_1710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1710_cast_fp16 = slice_by_index(begin = var_1710_begin_0, end = var_1710_end_0, end_mask = var_1710_end_mask_0, x = k_cast_fp16)[name = tensor("op_1710_cast_fp16")]; tensor var_1714_begin_0 = const()[name = tensor("op_1714_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_1714_end_0 = const()[name = tensor("op_1714_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_1714_end_mask_0 = const()[name = tensor("op_1714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1714_cast_fp16 = slice_by_index(begin = var_1714_begin_0, end = var_1714_end_0, end_mask = var_1714_end_mask_0, x = k_cast_fp16)[name = tensor("op_1714_cast_fp16")]; tensor var_1718_begin_0 = const()[name = tensor("op_1718_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_1718_end_0 = const()[name = tensor("op_1718_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_1718_end_mask_0 = const()[name = tensor("op_1718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1718_cast_fp16 = slice_by_index(begin = var_1718_begin_0, end = var_1718_end_0, end_mask = var_1718_end_mask_0, x = k_cast_fp16)[name = tensor("op_1718_cast_fp16")]; tensor var_1722_begin_0 = const()[name = tensor("op_1722_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1722_end_0 = const()[name = tensor("op_1722_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_1722_end_mask_0 = const()[name = tensor("op_1722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1722_cast_fp16 = slice_by_index(begin = var_1722_begin_0, end = var_1722_end_0, end_mask = var_1722_end_mask_0, x = k_cast_fp16)[name = tensor("op_1722_cast_fp16")]; tensor var_1726_begin_0 = const()[name = tensor("op_1726_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_1726_end_0 = const()[name = tensor("op_1726_end_0"), val = tensor([1, 1500, 1, 1])]; tensor var_1726_end_mask_0 = const()[name = tensor("op_1726_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1726_cast_fp16 = slice_by_index(begin = var_1726_begin_0, end = var_1726_end_0, end_mask = var_1726_end_mask_0, x = k_cast_fp16)[name = tensor("op_1726_cast_fp16")]; tensor var_1728_begin_0 = const()[name = tensor("op_1728_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1728_end_0 = const()[name = tensor("op_1728_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1728_end_mask_0 = const()[name = tensor("op_1728_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1728_cast_fp16 = slice_by_index(begin = var_1728_begin_0, end = var_1728_end_0, end_mask = var_1728_end_mask_0, x = value_cast_fp16)[name = tensor("op_1728_cast_fp16")]; tensor var_1732_begin_0 = const()[name = tensor("op_1732_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1732_end_0 = const()[name = tensor("op_1732_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1732_end_mask_0 = const()[name = tensor("op_1732_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1732_cast_fp16 = slice_by_index(begin = var_1732_begin_0, end = var_1732_end_0, end_mask = var_1732_end_mask_0, x = value_cast_fp16)[name = tensor("op_1732_cast_fp16")]; tensor var_1736_begin_0 = const()[name = tensor("op_1736_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1736_end_0 = const()[name = tensor("op_1736_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1736_end_mask_0 = const()[name = tensor("op_1736_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1736_cast_fp16 = slice_by_index(begin = var_1736_begin_0, end = var_1736_end_0, end_mask = var_1736_end_mask_0, x = value_cast_fp16)[name = tensor("op_1736_cast_fp16")]; tensor var_1740_begin_0 = const()[name = tensor("op_1740_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1740_end_0 = const()[name = tensor("op_1740_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1740_end_mask_0 = const()[name = tensor("op_1740_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1740_cast_fp16 = slice_by_index(begin = var_1740_begin_0, end = var_1740_end_0, end_mask = var_1740_end_mask_0, x = value_cast_fp16)[name = tensor("op_1740_cast_fp16")]; tensor var_1744_begin_0 = const()[name = tensor("op_1744_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1744_end_0 = const()[name = tensor("op_1744_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1744_end_mask_0 = const()[name = tensor("op_1744_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1744_cast_fp16 = slice_by_index(begin = var_1744_begin_0, end = var_1744_end_0, end_mask = var_1744_end_mask_0, x = value_cast_fp16)[name = tensor("op_1744_cast_fp16")]; tensor var_1748_begin_0 = const()[name = tensor("op_1748_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1748_end_0 = const()[name = tensor("op_1748_end_0"), val = tensor([1, 1, 1, 1500])]; tensor var_1748_end_mask_0 = const()[name = tensor("op_1748_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_1748_cast_fp16 = slice_by_index(begin = var_1748_begin_0, end = var_1748_end_0, end_mask = var_1748_end_mask_0, x = value_cast_fp16)[name = tensor("op_1748_cast_fp16")]; tensor _SplitHeadsQ__mh_w_217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_217_equation_0, values = (var_1706_cast_fp16, var_1666_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_219_equation_0, values = (var_1706_cast_fp16, var_1667_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_221_equation_0, values = (var_1706_cast_fp16, var_1668_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_223_equation_0, values = (var_1706_cast_fp16, var_1669_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_225_equation_0, values = (var_1706_cast_fp16, var_1670_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_227_equation_0, values = (var_1706_cast_fp16, var_1671_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_229_equation_0, values = (var_1710_cast_fp16, var_1672_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_231_equation_0, values = (var_1710_cast_fp16, var_1673_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_233_equation_0, values = (var_1710_cast_fp16, var_1674_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_235_equation_0, values = (var_1710_cast_fp16, var_1675_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_237_equation_0, values = (var_1710_cast_fp16, var_1676_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_239_equation_0, values = (var_1710_cast_fp16, var_1677_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_241_equation_0, values = (var_1714_cast_fp16, var_1678_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_243_equation_0, values = (var_1714_cast_fp16, var_1679_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_245_equation_0, values = (var_1714_cast_fp16, var_1680_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_247_equation_0, values = (var_1714_cast_fp16, var_1681_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_249_equation_0, values = (var_1714_cast_fp16, var_1682_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_251_equation_0, values = (var_1714_cast_fp16, var_1683_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_253_equation_0, values = (var_1718_cast_fp16, var_1684_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_255_equation_0, values = (var_1718_cast_fp16, var_1685_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_257_equation_0, values = (var_1718_cast_fp16, var_1686_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_259_equation_0, values = (var_1718_cast_fp16, var_1687_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_261_equation_0, values = (var_1718_cast_fp16, var_1688_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_263_equation_0, values = (var_1718_cast_fp16, var_1689_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_265_equation_0, values = (var_1722_cast_fp16, var_1690_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_267_equation_0, values = (var_1722_cast_fp16, var_1691_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_269_equation_0, values = (var_1722_cast_fp16, var_1692_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_271_equation_0, values = (var_1722_cast_fp16, var_1693_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_273_equation_0, values = (var_1722_cast_fp16, var_1694_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_275_equation_0, values = (var_1722_cast_fp16, var_1695_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_277_equation_0, values = (var_1726_cast_fp16, var_1696_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_279_equation_0, values = (var_1726_cast_fp16, var_1697_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_281_equation_0, values = (var_1726_cast_fp16, var_1698_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_283_equation_0, values = (var_1726_cast_fp16, var_1699_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_285_equation_0, values = (var_1726_cast_fp16, var_1700_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_equation_0, values = (var_1726_cast_fp16, var_1701_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_cast_fp16")]; tensor var_1823_to_fp16 = const()[name = tensor("op_1823_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_217_cast_fp16, y = var_1823_to_fp16)[name = tensor("aw_chunk_217_cast_fp16")]; tensor var_1825_to_fp16 = const()[name = tensor("op_1825_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_219_cast_fp16, y = var_1825_to_fp16)[name = tensor("aw_chunk_219_cast_fp16")]; tensor var_1827_to_fp16 = const()[name = tensor("op_1827_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_221_cast_fp16, y = var_1827_to_fp16)[name = tensor("aw_chunk_221_cast_fp16")]; tensor var_1829_to_fp16 = const()[name = tensor("op_1829_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_223_cast_fp16, y = var_1829_to_fp16)[name = tensor("aw_chunk_223_cast_fp16")]; tensor var_1831_to_fp16 = const()[name = tensor("op_1831_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_225_cast_fp16, y = var_1831_to_fp16)[name = tensor("aw_chunk_225_cast_fp16")]; tensor var_1833_to_fp16 = const()[name = tensor("op_1833_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_227_cast_fp16, y = var_1833_to_fp16)[name = tensor("aw_chunk_227_cast_fp16")]; tensor var_1835_to_fp16 = const()[name = tensor("op_1835_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_229_cast_fp16, y = var_1835_to_fp16)[name = tensor("aw_chunk_229_cast_fp16")]; tensor var_1837_to_fp16 = const()[name = tensor("op_1837_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_231_cast_fp16, y = var_1837_to_fp16)[name = tensor("aw_chunk_231_cast_fp16")]; tensor var_1839_to_fp16 = const()[name = tensor("op_1839_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_233_cast_fp16, y = var_1839_to_fp16)[name = tensor("aw_chunk_233_cast_fp16")]; tensor var_1841_to_fp16 = const()[name = tensor("op_1841_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_235_cast_fp16, y = var_1841_to_fp16)[name = tensor("aw_chunk_235_cast_fp16")]; tensor var_1843_to_fp16 = const()[name = tensor("op_1843_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_237_cast_fp16, y = var_1843_to_fp16)[name = tensor("aw_chunk_237_cast_fp16")]; tensor var_1845_to_fp16 = const()[name = tensor("op_1845_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_239_cast_fp16, y = var_1845_to_fp16)[name = tensor("aw_chunk_239_cast_fp16")]; tensor var_1847_to_fp16 = const()[name = tensor("op_1847_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_241_cast_fp16, y = var_1847_to_fp16)[name = tensor("aw_chunk_241_cast_fp16")]; tensor var_1849_to_fp16 = const()[name = tensor("op_1849_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_243_cast_fp16, y = var_1849_to_fp16)[name = tensor("aw_chunk_243_cast_fp16")]; tensor var_1851_to_fp16 = const()[name = tensor("op_1851_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_245_cast_fp16, y = var_1851_to_fp16)[name = tensor("aw_chunk_245_cast_fp16")]; tensor var_1853_to_fp16 = const()[name = tensor("op_1853_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_247_cast_fp16, y = var_1853_to_fp16)[name = tensor("aw_chunk_247_cast_fp16")]; tensor var_1855_to_fp16 = const()[name = tensor("op_1855_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_249_cast_fp16, y = var_1855_to_fp16)[name = tensor("aw_chunk_249_cast_fp16")]; tensor var_1857_to_fp16 = const()[name = tensor("op_1857_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_251_cast_fp16, y = var_1857_to_fp16)[name = tensor("aw_chunk_251_cast_fp16")]; tensor var_1859_to_fp16 = const()[name = tensor("op_1859_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_253_cast_fp16, y = var_1859_to_fp16)[name = tensor("aw_chunk_253_cast_fp16")]; tensor var_1861_to_fp16 = const()[name = tensor("op_1861_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_255_cast_fp16, y = var_1861_to_fp16)[name = tensor("aw_chunk_255_cast_fp16")]; tensor var_1863_to_fp16 = const()[name = tensor("op_1863_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_257_cast_fp16, y = var_1863_to_fp16)[name = tensor("aw_chunk_257_cast_fp16")]; tensor var_1865_to_fp16 = const()[name = tensor("op_1865_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_259_cast_fp16, y = var_1865_to_fp16)[name = tensor("aw_chunk_259_cast_fp16")]; tensor var_1867_to_fp16 = const()[name = tensor("op_1867_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_261_cast_fp16, y = var_1867_to_fp16)[name = tensor("aw_chunk_261_cast_fp16")]; tensor var_1869_to_fp16 = const()[name = tensor("op_1869_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_263_cast_fp16, y = var_1869_to_fp16)[name = tensor("aw_chunk_263_cast_fp16")]; tensor var_1871_to_fp16 = const()[name = tensor("op_1871_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_265_cast_fp16, y = var_1871_to_fp16)[name = tensor("aw_chunk_265_cast_fp16")]; tensor var_1873_to_fp16 = const()[name = tensor("op_1873_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_267_cast_fp16, y = var_1873_to_fp16)[name = tensor("aw_chunk_267_cast_fp16")]; tensor var_1875_to_fp16 = const()[name = tensor("op_1875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_269_cast_fp16, y = var_1875_to_fp16)[name = tensor("aw_chunk_269_cast_fp16")]; tensor var_1877_to_fp16 = const()[name = tensor("op_1877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_271_cast_fp16, y = var_1877_to_fp16)[name = tensor("aw_chunk_271_cast_fp16")]; tensor var_1879_to_fp16 = const()[name = tensor("op_1879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_273_cast_fp16, y = var_1879_to_fp16)[name = tensor("aw_chunk_273_cast_fp16")]; tensor var_1881_to_fp16 = const()[name = tensor("op_1881_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_275_cast_fp16, y = var_1881_to_fp16)[name = tensor("aw_chunk_275_cast_fp16")]; tensor var_1883_to_fp16 = const()[name = tensor("op_1883_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_277_cast_fp16, y = var_1883_to_fp16)[name = tensor("aw_chunk_277_cast_fp16")]; tensor var_1885_to_fp16 = const()[name = tensor("op_1885_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_279_cast_fp16, y = var_1885_to_fp16)[name = tensor("aw_chunk_279_cast_fp16")]; tensor var_1887_to_fp16 = const()[name = tensor("op_1887_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_281_cast_fp16, y = var_1887_to_fp16)[name = tensor("aw_chunk_281_cast_fp16")]; tensor var_1889_to_fp16 = const()[name = tensor("op_1889_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_283_cast_fp16, y = var_1889_to_fp16)[name = tensor("aw_chunk_283_cast_fp16")]; tensor var_1891_to_fp16 = const()[name = tensor("op_1891_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_285_cast_fp16, y = var_1891_to_fp16)[name = tensor("aw_chunk_285_cast_fp16")]; tensor var_1893_to_fp16 = const()[name = tensor("op_1893_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_cast_fp16, y = var_1893_to_fp16)[name = tensor("aw_chunk_cast_fp16")]; tensor var_1895_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_217_cast_fp16)[name = tensor("op_1895_cast_fp16")]; tensor var_1896_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_219_cast_fp16)[name = tensor("op_1896_cast_fp16")]; tensor var_1897_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_221_cast_fp16)[name = tensor("op_1897_cast_fp16")]; tensor var_1898_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_223_cast_fp16)[name = tensor("op_1898_cast_fp16")]; tensor var_1899_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_225_cast_fp16)[name = tensor("op_1899_cast_fp16")]; tensor var_1900_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_227_cast_fp16)[name = tensor("op_1900_cast_fp16")]; tensor var_1901_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_229_cast_fp16)[name = tensor("op_1901_cast_fp16")]; tensor var_1902_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_231_cast_fp16)[name = tensor("op_1902_cast_fp16")]; tensor var_1903_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_233_cast_fp16)[name = tensor("op_1903_cast_fp16")]; tensor var_1904_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_235_cast_fp16)[name = tensor("op_1904_cast_fp16")]; tensor var_1905_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_237_cast_fp16)[name = tensor("op_1905_cast_fp16")]; tensor var_1906_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_239_cast_fp16)[name = tensor("op_1906_cast_fp16")]; tensor var_1907_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_241_cast_fp16)[name = tensor("op_1907_cast_fp16")]; tensor var_1908_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_243_cast_fp16)[name = tensor("op_1908_cast_fp16")]; tensor var_1909_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_245_cast_fp16)[name = tensor("op_1909_cast_fp16")]; tensor var_1910_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_247_cast_fp16)[name = tensor("op_1910_cast_fp16")]; tensor var_1911_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_249_cast_fp16)[name = tensor("op_1911_cast_fp16")]; tensor var_1912_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_251_cast_fp16)[name = tensor("op_1912_cast_fp16")]; tensor var_1913_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_253_cast_fp16)[name = tensor("op_1913_cast_fp16")]; tensor var_1914_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_255_cast_fp16)[name = tensor("op_1914_cast_fp16")]; tensor var_1915_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_257_cast_fp16)[name = tensor("op_1915_cast_fp16")]; tensor var_1916_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_259_cast_fp16)[name = tensor("op_1916_cast_fp16")]; tensor var_1917_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_261_cast_fp16)[name = tensor("op_1917_cast_fp16")]; tensor var_1918_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_263_cast_fp16)[name = tensor("op_1918_cast_fp16")]; tensor var_1919_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_265_cast_fp16)[name = tensor("op_1919_cast_fp16")]; tensor var_1920_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_267_cast_fp16)[name = tensor("op_1920_cast_fp16")]; tensor var_1921_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_269_cast_fp16)[name = tensor("op_1921_cast_fp16")]; tensor var_1922_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_271_cast_fp16)[name = tensor("op_1922_cast_fp16")]; tensor var_1923_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_273_cast_fp16)[name = tensor("op_1923_cast_fp16")]; tensor var_1924_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_275_cast_fp16)[name = tensor("op_1924_cast_fp16")]; tensor var_1925_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_277_cast_fp16)[name = tensor("op_1925_cast_fp16")]; tensor var_1926_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_279_cast_fp16)[name = tensor("op_1926_cast_fp16")]; tensor var_1927_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_281_cast_fp16)[name = tensor("op_1927_cast_fp16")]; tensor var_1928_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_283_cast_fp16)[name = tensor("op_1928_cast_fp16")]; tensor var_1929_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_285_cast_fp16)[name = tensor("op_1929_cast_fp16")]; tensor var_1930_cast_fp16 = softmax(axis = var_1591, x = aw_chunk_cast_fp16)[name = tensor("op_1930_cast_fp16")]; tensor var_1932_equation_0 = const()[name = tensor("op_1932_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1932_cast_fp16 = einsum(equation = var_1932_equation_0, values = (var_1728_cast_fp16, var_1895_cast_fp16))[name = tensor("op_1932_cast_fp16")]; tensor var_1934_equation_0 = const()[name = tensor("op_1934_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1934_cast_fp16 = einsum(equation = var_1934_equation_0, values = (var_1728_cast_fp16, var_1896_cast_fp16))[name = tensor("op_1934_cast_fp16")]; tensor var_1936_equation_0 = const()[name = tensor("op_1936_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1936_cast_fp16 = einsum(equation = var_1936_equation_0, values = (var_1728_cast_fp16, var_1897_cast_fp16))[name = tensor("op_1936_cast_fp16")]; tensor var_1938_equation_0 = const()[name = tensor("op_1938_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1938_cast_fp16 = einsum(equation = var_1938_equation_0, values = (var_1728_cast_fp16, var_1898_cast_fp16))[name = tensor("op_1938_cast_fp16")]; tensor var_1940_equation_0 = const()[name = tensor("op_1940_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1940_cast_fp16 = einsum(equation = var_1940_equation_0, values = (var_1728_cast_fp16, var_1899_cast_fp16))[name = tensor("op_1940_cast_fp16")]; tensor var_1942_equation_0 = const()[name = tensor("op_1942_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1942_cast_fp16 = einsum(equation = var_1942_equation_0, values = (var_1728_cast_fp16, var_1900_cast_fp16))[name = tensor("op_1942_cast_fp16")]; tensor var_1944_equation_0 = const()[name = tensor("op_1944_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1944_cast_fp16 = einsum(equation = var_1944_equation_0, values = (var_1732_cast_fp16, var_1901_cast_fp16))[name = tensor("op_1944_cast_fp16")]; tensor var_1946_equation_0 = const()[name = tensor("op_1946_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1946_cast_fp16 = einsum(equation = var_1946_equation_0, values = (var_1732_cast_fp16, var_1902_cast_fp16))[name = tensor("op_1946_cast_fp16")]; tensor var_1948_equation_0 = const()[name = tensor("op_1948_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1948_cast_fp16 = einsum(equation = var_1948_equation_0, values = (var_1732_cast_fp16, var_1903_cast_fp16))[name = tensor("op_1948_cast_fp16")]; tensor var_1950_equation_0 = const()[name = tensor("op_1950_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1950_cast_fp16 = einsum(equation = var_1950_equation_0, values = (var_1732_cast_fp16, var_1904_cast_fp16))[name = tensor("op_1950_cast_fp16")]; tensor var_1952_equation_0 = const()[name = tensor("op_1952_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1952_cast_fp16 = einsum(equation = var_1952_equation_0, values = (var_1732_cast_fp16, var_1905_cast_fp16))[name = tensor("op_1952_cast_fp16")]; tensor var_1954_equation_0 = const()[name = tensor("op_1954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1954_cast_fp16 = einsum(equation = var_1954_equation_0, values = (var_1732_cast_fp16, var_1906_cast_fp16))[name = tensor("op_1954_cast_fp16")]; tensor var_1956_equation_0 = const()[name = tensor("op_1956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1956_cast_fp16 = einsum(equation = var_1956_equation_0, values = (var_1736_cast_fp16, var_1907_cast_fp16))[name = tensor("op_1956_cast_fp16")]; tensor var_1958_equation_0 = const()[name = tensor("op_1958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1958_cast_fp16 = einsum(equation = var_1958_equation_0, values = (var_1736_cast_fp16, var_1908_cast_fp16))[name = tensor("op_1958_cast_fp16")]; tensor var_1960_equation_0 = const()[name = tensor("op_1960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1960_cast_fp16 = einsum(equation = var_1960_equation_0, values = (var_1736_cast_fp16, var_1909_cast_fp16))[name = tensor("op_1960_cast_fp16")]; tensor var_1962_equation_0 = const()[name = tensor("op_1962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1962_cast_fp16 = einsum(equation = var_1962_equation_0, values = (var_1736_cast_fp16, var_1910_cast_fp16))[name = tensor("op_1962_cast_fp16")]; tensor var_1964_equation_0 = const()[name = tensor("op_1964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1964_cast_fp16 = einsum(equation = var_1964_equation_0, values = (var_1736_cast_fp16, var_1911_cast_fp16))[name = tensor("op_1964_cast_fp16")]; tensor var_1966_equation_0 = const()[name = tensor("op_1966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1966_cast_fp16 = einsum(equation = var_1966_equation_0, values = (var_1736_cast_fp16, var_1912_cast_fp16))[name = tensor("op_1966_cast_fp16")]; tensor var_1968_equation_0 = const()[name = tensor("op_1968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1968_cast_fp16 = einsum(equation = var_1968_equation_0, values = (var_1740_cast_fp16, var_1913_cast_fp16))[name = tensor("op_1968_cast_fp16")]; tensor var_1970_equation_0 = const()[name = tensor("op_1970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1970_cast_fp16 = einsum(equation = var_1970_equation_0, values = (var_1740_cast_fp16, var_1914_cast_fp16))[name = tensor("op_1970_cast_fp16")]; tensor var_1972_equation_0 = const()[name = tensor("op_1972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1972_cast_fp16 = einsum(equation = var_1972_equation_0, values = (var_1740_cast_fp16, var_1915_cast_fp16))[name = tensor("op_1972_cast_fp16")]; tensor var_1974_equation_0 = const()[name = tensor("op_1974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1974_cast_fp16 = einsum(equation = var_1974_equation_0, values = (var_1740_cast_fp16, var_1916_cast_fp16))[name = tensor("op_1974_cast_fp16")]; tensor var_1976_equation_0 = const()[name = tensor("op_1976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1976_cast_fp16 = einsum(equation = var_1976_equation_0, values = (var_1740_cast_fp16, var_1917_cast_fp16))[name = tensor("op_1976_cast_fp16")]; tensor var_1978_equation_0 = const()[name = tensor("op_1978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1978_cast_fp16 = einsum(equation = var_1978_equation_0, values = (var_1740_cast_fp16, var_1918_cast_fp16))[name = tensor("op_1978_cast_fp16")]; tensor var_1980_equation_0 = const()[name = tensor("op_1980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1980_cast_fp16 = einsum(equation = var_1980_equation_0, values = (var_1744_cast_fp16, var_1919_cast_fp16))[name = tensor("op_1980_cast_fp16")]; tensor var_1982_equation_0 = const()[name = tensor("op_1982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1982_cast_fp16 = einsum(equation = var_1982_equation_0, values = (var_1744_cast_fp16, var_1920_cast_fp16))[name = tensor("op_1982_cast_fp16")]; tensor var_1984_equation_0 = const()[name = tensor("op_1984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1984_cast_fp16 = einsum(equation = var_1984_equation_0, values = (var_1744_cast_fp16, var_1921_cast_fp16))[name = tensor("op_1984_cast_fp16")]; tensor var_1986_equation_0 = const()[name = tensor("op_1986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1986_cast_fp16 = einsum(equation = var_1986_equation_0, values = (var_1744_cast_fp16, var_1922_cast_fp16))[name = tensor("op_1986_cast_fp16")]; tensor var_1988_equation_0 = const()[name = tensor("op_1988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1988_cast_fp16 = einsum(equation = var_1988_equation_0, values = (var_1744_cast_fp16, var_1923_cast_fp16))[name = tensor("op_1988_cast_fp16")]; tensor var_1990_equation_0 = const()[name = tensor("op_1990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1990_cast_fp16 = einsum(equation = var_1990_equation_0, values = (var_1744_cast_fp16, var_1924_cast_fp16))[name = tensor("op_1990_cast_fp16")]; tensor var_1992_equation_0 = const()[name = tensor("op_1992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1992_cast_fp16 = einsum(equation = var_1992_equation_0, values = (var_1748_cast_fp16, var_1925_cast_fp16))[name = tensor("op_1992_cast_fp16")]; tensor var_1994_equation_0 = const()[name = tensor("op_1994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1994_cast_fp16 = einsum(equation = var_1994_equation_0, values = (var_1748_cast_fp16, var_1926_cast_fp16))[name = tensor("op_1994_cast_fp16")]; tensor var_1996_equation_0 = const()[name = tensor("op_1996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1996_cast_fp16 = einsum(equation = var_1996_equation_0, values = (var_1748_cast_fp16, var_1927_cast_fp16))[name = tensor("op_1996_cast_fp16")]; tensor var_1998_equation_0 = const()[name = tensor("op_1998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1998_cast_fp16 = einsum(equation = var_1998_equation_0, values = (var_1748_cast_fp16, var_1928_cast_fp16))[name = tensor("op_1998_cast_fp16")]; tensor var_2000_equation_0 = const()[name = tensor("op_2000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2000_cast_fp16 = einsum(equation = var_2000_equation_0, values = (var_1748_cast_fp16, var_1929_cast_fp16))[name = tensor("op_2000_cast_fp16")]; tensor var_2002_equation_0 = const()[name = tensor("op_2002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2002_cast_fp16 = einsum(equation = var_2002_equation_0, values = (var_1748_cast_fp16, var_1930_cast_fp16))[name = tensor("op_2002_cast_fp16")]; tensor var_2004_interleave_0 = const()[name = tensor("op_2004_interleave_0"), val = tensor(false)]; tensor var_2004_cast_fp16 = concat(axis = var_1579, interleave = var_2004_interleave_0, values = (var_1932_cast_fp16, var_1934_cast_fp16, var_1936_cast_fp16, var_1938_cast_fp16, var_1940_cast_fp16, var_1942_cast_fp16))[name = tensor("op_2004_cast_fp16")]; tensor var_2006_interleave_0 = const()[name = tensor("op_2006_interleave_0"), val = tensor(false)]; tensor var_2006_cast_fp16 = concat(axis = var_1579, interleave = var_2006_interleave_0, values = (var_1944_cast_fp16, var_1946_cast_fp16, var_1948_cast_fp16, var_1950_cast_fp16, var_1952_cast_fp16, var_1954_cast_fp16))[name = tensor("op_2006_cast_fp16")]; tensor var_2008_interleave_0 = const()[name = tensor("op_2008_interleave_0"), val = tensor(false)]; tensor var_2008_cast_fp16 = concat(axis = var_1579, interleave = var_2008_interleave_0, values = (var_1956_cast_fp16, var_1958_cast_fp16, var_1960_cast_fp16, var_1962_cast_fp16, var_1964_cast_fp16, var_1966_cast_fp16))[name = tensor("op_2008_cast_fp16")]; tensor var_2010_interleave_0 = const()[name = tensor("op_2010_interleave_0"), val = tensor(false)]; tensor var_2010_cast_fp16 = concat(axis = var_1579, interleave = var_2010_interleave_0, values = (var_1968_cast_fp16, var_1970_cast_fp16, var_1972_cast_fp16, var_1974_cast_fp16, var_1976_cast_fp16, var_1978_cast_fp16))[name = tensor("op_2010_cast_fp16")]; tensor var_2012_interleave_0 = const()[name = tensor("op_2012_interleave_0"), val = tensor(false)]; tensor var_2012_cast_fp16 = concat(axis = var_1579, interleave = var_2012_interleave_0, values = (var_1980_cast_fp16, var_1982_cast_fp16, var_1984_cast_fp16, var_1986_cast_fp16, var_1988_cast_fp16, var_1990_cast_fp16))[name = tensor("op_2012_cast_fp16")]; tensor var_2014_interleave_0 = const()[name = tensor("op_2014_interleave_0"), val = tensor(false)]; tensor var_2014_cast_fp16 = concat(axis = var_1579, interleave = var_2014_interleave_0, values = (var_1992_cast_fp16, var_1994_cast_fp16, var_1996_cast_fp16, var_1998_cast_fp16, var_2000_cast_fp16, var_2002_cast_fp16))[name = tensor("op_2014_cast_fp16")]; tensor input_25_interleave_0 = const()[name = tensor("input_25_interleave_0"), val = tensor(false)]; tensor input_25_cast_fp16 = concat(axis = var_1591, interleave = input_25_interleave_0, values = (var_2004_cast_fp16, var_2006_cast_fp16, var_2008_cast_fp16, var_2010_cast_fp16, var_2012_cast_fp16, var_2014_cast_fp16))[name = tensor("input_25_cast_fp16")]; tensor obj_pad_type_0 = const()[name = tensor("obj_pad_type_0"), val = tensor("valid")]; tensor obj_strides_0 = const()[name = tensor("obj_strides_0"), val = tensor([1, 1])]; tensor obj_pad_0 = const()[name = tensor("obj_pad_0"), val = tensor([0, 0, 0, 0])]; tensor obj_dilations_0 = const()[name = tensor("obj_dilations_0"), val = tensor([1, 1])]; tensor obj_groups_0 = const()[name = tensor("obj_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13760256)))]; tensor layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14055232)))]; tensor obj_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = obj_dilations_0, groups = obj_groups_0, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = obj_strides_0, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("obj_cast_fp16")]; tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; tensor var_2033_to_fp16 = const()[name = tensor("op_2033_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_2033_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14056064)))]; tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14056896)))]; tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor input_29_pad_type_0 = const()[name = tensor("input_29_pad_type_0"), val = tensor("valid")]; tensor input_29_strides_0 = const()[name = tensor("input_29_strides_0"), val = tensor([1, 1])]; tensor input_29_pad_0 = const()[name = tensor("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_29_dilations_0 = const()[name = tensor("input_29_dilations_0"), val = tensor([1, 1])]; tensor input_29_groups_0 = const()[name = tensor("input_29_groups_0"), val = tensor(1)]; tensor layers_3_fc1_weight_to_fp16 = const()[name = tensor("layers_3_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14057728)))]; tensor layers_3_fc1_bias_to_fp16 = const()[name = tensor("layers_3_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15237440)))]; tensor input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_29_cast_fp16)[name = tensor("input_cast_fp16")]; tensor hidden_states_pad_type_0 = const()[name = tensor("hidden_states_pad_type_0"), val = tensor("valid")]; tensor hidden_states_strides_0 = const()[name = tensor("hidden_states_strides_0"), val = tensor([1, 1])]; tensor hidden_states_pad_0 = const()[name = tensor("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor hidden_states_dilations_0 = const()[name = tensor("hidden_states_dilations_0"), val = tensor([1, 1])]; tensor hidden_states_groups_0 = const()[name = tensor("hidden_states_groups_0"), val = tensor(1)]; tensor layers_3_fc2_weight_to_fp16 = const()[name = tensor("layers_3_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15240576)))]; tensor layers_3_fc2_bias_to_fp16 = const()[name = tensor("layers_3_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16420288)))]; tensor hidden_states_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = layers_3_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; tensor inputs_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; tensor var_2071_to_fp16 = const()[name = tensor("op_2071_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2071_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16421120)))]; tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16421952)))]; tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; } -> (encoder_output_embeds); }