program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor audio_embed, tensor cache_k0, tensor cache_k1, tensor cache_k10, tensor cache_k11, tensor cache_k2, tensor cache_k3, tensor cache_k4, tensor cache_k5, tensor cache_k6, tensor cache_k7, tensor cache_k8, tensor cache_k9, tensor cache_v0, tensor cache_v1, tensor cache_v10, tensor cache_v11, tensor cache_v2, tensor cache_v3, tensor cache_v4, tensor cache_v5, tensor cache_v6, tensor cache_v7, tensor cache_v8, tensor cache_v9, tensor encoder_mask, tensor encoder_output, tensor position0, tensor position1, tensor position10, tensor position11, tensor position2, tensor position3, tensor position4, tensor position5, tensor position6, tensor position7, tensor position8, tensor position9) { tensor cast_190_dtype_0 = const()[name = tensor("cast_190_dtype_0"), val = tensor("bool")]; tensor cast_190 = cast(dtype = cast_190_dtype_0, x = encoder_mask)[name = tensor("cast_190")]; tensor var_75_batch_dims_0 = const()[name = tensor("op_75_batch_dims_0"), val = tensor(0)]; tensor var_75_validate_indices_0 = const()[name = tensor("op_75_validate_indices_0"), val = tensor(false)]; tensor position_embeddings_weight_to_fp16 = const()[name = tensor("position_embeddings_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor position0_to_int32_to_int16_dtype_0 = const()[name = tensor("position0_to_int32_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_161_dtype_0 = const()[name = tensor("cast_161_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0 = const()[name = tensor("greater_equal_0_y_0"), val = tensor(0)]; tensor position0_to_int16 = cast(dtype = position0_to_int32_to_int16_dtype_0, x = position0)[name = tensor("cast_189")]; tensor cast_161 = cast(dtype = cast_161_dtype_0, x = position0_to_int16)[name = tensor("cast_188")]; tensor greater_equal_0 = greater_equal(x = cast_161, y = greater_equal_0_y_0)[name = tensor("greater_equal_0")]; tensor slice_by_index_0 = const()[name = tensor("slice_by_index_0"), val = tensor(2048)]; tensor add_0 = add(x = cast_161, y = slice_by_index_0)[name = tensor("add_0")]; tensor select_0 = select(a = cast_161, b = add_0, cond = greater_equal_0)[name = tensor("select_0")]; tensor var_75_cast_fp16_cast_uint16_axis_0 = const()[name = tensor("op_75_cast_fp16_cast_uint16_axis_0"), val = tensor(0)]; tensor select_0_to_int16_dtype_0 = const()[name = tensor("select_0_to_int16_dtype_0"), val = tensor("int16")]; tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor("cast_187")]; tensor var_75_cast_fp16_cast_uint16_cast_uint16 = gather(axis = var_75_cast_fp16_cast_uint16_axis_0, batch_dims = var_75_batch_dims_0, indices = select_0_to_int16, validate_indices = var_75_validate_indices_0, x = position_embeddings_weight_to_fp16)[name = tensor("op_75_cast_fp16_cast_uint16_cast_uint16")]; tensor var_77_axes_0 = const()[name = tensor("op_77_axes_0"), val = tensor([0])]; tensor var_77_cast_fp16 = expand_dims(axes = var_77_axes_0, x = var_75_cast_fp16_cast_uint16_cast_uint16)[name = tensor("op_77_cast_fp16")]; tensor input_3_cast_fp16 = add(x = audio_embed, y = var_77_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor var_94 = const()[name = tensor("op_94"), val = tensor(-1)]; tensor x_1_axes_0 = const()[name = tensor("x_1_axes_0"), val = tensor([-1])]; tensor layers_0_norm_sa_weight_to_fp16 = const()[name = tensor("layers_0_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3145856)))]; tensor var_97_to_fp16 = const()[name = tensor("op_97_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_sa_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("x_1_cast_fp16")]; tensor layers_0_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3147456)))]; tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6686464)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_qkv_proj_weight_to_fp16, x = x_1_cast_fp16)[name = tensor("linear_0_cast_fp16")]; tensor var_116 = const()[name = tensor("op_116"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_3_cast_fp16 = reshape(shape = var_116, x = linear_0_cast_fp16)[name = tensor("qkv_3_cast_fp16")]; tensor q_1_begin_0 = const()[name = tensor("q_1_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_1_end_0 = const()[name = tensor("q_1_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_1_end_mask_0 = const()[name = tensor("q_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_1_squeeze_mask_0 = const()[name = tensor("q_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("q_1_cast_fp16")]; tensor k_1_begin_0 = const()[name = tensor("k_1_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_1_end_0 = const()[name = tensor("k_1_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_1_end_mask_0 = const()[name = tensor("k_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_1_squeeze_mask_0 = const()[name = tensor("k_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_1_cast_fp16 = slice_by_index(begin = k_1_begin_0, end = k_1_end_0, end_mask = k_1_end_mask_0, squeeze_mask = k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("k_1_cast_fp16")]; tensor v_1_begin_0 = const()[name = tensor("v_1_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_1_end_0 = const()[name = tensor("v_1_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_1_end_mask_0 = const()[name = tensor("v_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_1_squeeze_mask_0 = const()[name = tensor("v_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_1_cast_fp16 = slice_by_index(begin = v_1_begin_0, end = v_1_end_0, end_mask = v_1_end_mask_0, squeeze_mask = v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("v_1_cast_fp16")]; tensor positions_range_1_promoted_to_fp16 = const()[name = tensor("positions_range_1_promoted_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6691136)))]; tensor var_128_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position0)[name = tensor("op_128_cast_fp16")]; tensor var_130 = const()[name = tensor("op_130"), val = tensor([1, 512, 1, 1])]; tensor var_129_to_fp16_dtype_0 = const()[name = tensor("op_129_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_128_cast_fp16_to_fp16 = cast(dtype = var_129_to_fp16_dtype_0, x = var_128_cast_fp16)[name = tensor("cast_186")]; tensor mask_1_cast_fp16 = reshape(shape = var_130, x = var_128_cast_fp16_to_fp16)[name = tensor("mask_1_cast_fp16")]; tensor k_new_1_reps_0 = const()[name = tensor("k_new_1_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_1_cast_fp16 = tile(reps = k_new_1_reps_0, x = k_1_cast_fp16)[name = tensor("k_new_1_cast_fp16")]; tensor v_new_1_reps_0 = const()[name = tensor("v_new_1_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_1_cast_fp16 = tile(reps = v_new_1_reps_0, x = v_1_cast_fp16)[name = tensor("v_new_1_cast_fp16")]; tensor var_92_to_fp16 = const()[name = tensor("op_92_to_fp16"), val = tensor(0x1p+0)]; tensor var_136_cast_fp16 = sub(x = var_92_to_fp16, y = mask_1_cast_fp16)[name = tensor("op_136_cast_fp16")]; tensor var_137_cast_fp16 = mul(x = cache_k0, y = var_136_cast_fp16)[name = tensor("op_137_cast_fp16")]; tensor var_138_cast_fp16 = mul(x = k_new_1_cast_fp16, y = mask_1_cast_fp16)[name = tensor("op_138_cast_fp16")]; tensor new_k_1 = add(x = var_137_cast_fp16, y = var_138_cast_fp16)[name = tensor("new_k_1_cast_fp16")]; tensor var_141_cast_fp16 = mul(x = cache_v0, y = var_136_cast_fp16)[name = tensor("op_141_cast_fp16")]; tensor var_142_cast_fp16 = mul(x = v_new_1_cast_fp16, y = mask_1_cast_fp16)[name = tensor("op_142_cast_fp16")]; tensor new_v_1 = add(x = var_141_cast_fp16, y = var_142_cast_fp16)[name = tensor("new_v_1_cast_fp16")]; tensor var_144_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position0)[name = tensor("op_144_cast_fp16")]; tensor var_146 = const()[name = tensor("op_146"), val = tensor([1, 1, 1, 512])]; tensor var_145_to_fp16_dtype_0 = const()[name = tensor("op_145_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_144_cast_fp16_to_fp16 = cast(dtype = var_145_to_fp16_dtype_0, x = var_144_cast_fp16)[name = tensor("cast_185")]; tensor var_147_cast_fp16 = reshape(shape = var_146, x = var_144_cast_fp16_to_fp16)[name = tensor("op_147_cast_fp16")]; tensor var_151 = const()[name = tensor("op_151"), val = tensor([0, 2, 1, 3])]; tensor var_154_transpose_x_0 = const()[name = tensor("op_154_transpose_x_0"), val = tensor(false)]; tensor var_154_transpose_y_0 = const()[name = tensor("op_154_transpose_y_0"), val = tensor(false)]; tensor transpose_72_perm_0 = const()[name = tensor("transpose_72_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_73_perm_0 = const()[name = tensor("transpose_73_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_73 = transpose(perm = transpose_73_perm_0, x = new_k_1)[name = tensor("transpose_237")]; tensor transpose_72 = transpose(perm = transpose_72_perm_0, x = q_1_cast_fp16)[name = tensor("transpose_238")]; tensor var_154_cast_fp16 = matmul(transpose_x = var_154_transpose_x_0, transpose_y = var_154_transpose_y_0, x = transpose_72, y = transpose_73)[name = tensor("op_154_cast_fp16")]; tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(0x1p-3)]; tensor attn_1_cast_fp16 = mul(x = var_154_cast_fp16, y = var_155_to_fp16)[name = tensor("attn_1_cast_fp16")]; tensor var_157_cast_fp16 = sub(x = var_92_to_fp16, y = var_147_cast_fp16)[name = tensor("op_157_cast_fp16")]; tensor var_158_to_fp16 = const()[name = tensor("op_158_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_159_cast_fp16 = mul(x = var_157_cast_fp16, y = var_158_to_fp16)[name = tensor("op_159_cast_fp16")]; tensor input_5_cast_fp16 = add(x = attn_1_cast_fp16, y = var_159_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor attn_3_cast_fp16 = softmax(axis = var_94, x = input_5_cast_fp16)[name = tensor("attn_3_cast_fp16")]; tensor out_1_transpose_x_0 = const()[name = tensor("out_1_transpose_x_0"), val = tensor(false)]; tensor out_1_transpose_y_0 = const()[name = tensor("out_1_transpose_y_0"), val = tensor(false)]; tensor v4_1_cast_fp16 = transpose(perm = var_151, x = new_v_1)[name = tensor("transpose_239")]; tensor out_1_cast_fp16 = matmul(transpose_x = out_1_transpose_x_0, transpose_y = out_1_transpose_y_0, x = attn_3_cast_fp16, y = v4_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; tensor var_163_perm_0 = const()[name = tensor("op_163_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_164 = const()[name = tensor("op_164"), val = tensor([1, 1, -1])]; tensor var_163_cast_fp16 = transpose(perm = var_163_perm_0, x = out_1_cast_fp16)[name = tensor("transpose_236")]; tensor input_7_cast_fp16 = reshape(shape = var_164, x = var_163_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6692224)))]; tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7871936)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor var_168_to_fp16 = const()[name = tensor("op_168_to_fp16"), val = tensor(0x1p+0)]; tensor var_169 = add(x = position0, y = var_168_to_fp16)[name = tensor("op_169_cast_fp16")]; tensor input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor x_3_axes_0 = const()[name = tensor("x_3_axes_0"), val = tensor([-1])]; tensor layers_0_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_0_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7873536)))]; tensor x_3_cast_fp16 = layer_norm(axes = x_3_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_xa_query_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("x_3_cast_fp16")]; tensor memory_1_axes_0 = const()[name = tensor("memory_1_axes_0"), val = tensor([-1])]; tensor layers_0_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_0_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7875136)))]; tensor memory_1_cast_fp16 = layer_norm(axes = memory_1_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_1_cast_fp16")]; tensor layers_0_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_0_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7876736)))]; tensor linear_2_bias_0_to_fp16 = const()[name = tensor("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8073408)))]; tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_0_cross_attn_q_proj_weight_to_fp16, x = x_3_cast_fp16)[name = tensor("linear_2_cast_fp16")]; tensor var_190 = const()[name = tensor("op_190"), val = tensor([1, 1, 1, 128])]; tensor var_191_cast_fp16 = reshape(shape = var_190, x = linear_2_cast_fp16)[name = tensor("op_191_cast_fp16")]; tensor layers_0_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_0_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8073728)))]; tensor linear_3_bias_0_to_fp16 = const()[name = tensor("linear_3_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8467008)))]; tensor linear_3_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_0_cross_attn_kv_proj_weight_to_fp16, x = memory_1_cast_fp16)[name = tensor("linear_3_cast_fp16")]; tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_1_cast_fp16 = reshape(shape = var_195, x = linear_3_cast_fp16)[name = tensor("kv_1_cast_fp16")]; tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_199_squeeze_mask_0 = const()[name = tensor("op_199_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, squeeze_mask = var_199_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor("op_199_cast_fp16")]; tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_203_squeeze_mask_0 = const()[name = tensor("op_203_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, squeeze_mask = var_203_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor("op_203_cast_fp16")]; tensor v_3_perm_0 = const()[name = tensor("v_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_206_transpose_x_0 = const()[name = tensor("op_206_transpose_x_0"), val = tensor(false)]; tensor var_206_transpose_y_0 = const()[name = tensor("op_206_transpose_y_0"), val = tensor(false)]; tensor transpose_74_perm_0 = const()[name = tensor("transpose_74_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_75_perm_0 = const()[name = tensor("transpose_75_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_75 = transpose(perm = transpose_75_perm_0, x = var_199_cast_fp16)[name = tensor("transpose_233")]; tensor transpose_74 = transpose(perm = transpose_74_perm_0, x = var_191_cast_fp16)[name = tensor("transpose_234")]; tensor var_206_cast_fp16 = matmul(transpose_x = var_206_transpose_x_0, transpose_y = var_206_transpose_y_0, x = transpose_74, y = transpose_75)[name = tensor("op_206_cast_fp16")]; tensor var_207_to_fp16 = const()[name = tensor("op_207_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_5_cast_fp16 = mul(x = var_206_cast_fp16, y = var_207_to_fp16)[name = tensor("attn_5_cast_fp16")]; tensor var_210_axes_0 = const()[name = tensor("op_210_axes_0"), val = tensor([1])]; tensor var_209_to_fp16_dtype_0 = const()[name = tensor("op_209_to_fp16_dtype_0"), val = tensor("fp16")]; tensor encoder_mask_to_fp16 = cast(dtype = var_209_to_fp16_dtype_0, x = cast_190)[name = tensor("cast_184")]; tensor var_210_cast_fp16 = expand_dims(axes = var_210_axes_0, x = encoder_mask_to_fp16)[name = tensor("op_210_cast_fp16")]; tensor var_211_axes_0 = const()[name = tensor("op_211_axes_0"), val = tensor([2])]; tensor var_211_cast_fp16 = expand_dims(axes = var_211_axes_0, x = var_210_cast_fp16)[name = tensor("op_211_cast_fp16")]; tensor var_212_cast_fp16 = sub(x = var_92_to_fp16, y = var_211_cast_fp16)[name = tensor("op_212_cast_fp16")]; tensor var_213_to_fp16 = const()[name = tensor("op_213_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_214_cast_fp16 = mul(x = var_212_cast_fp16, y = var_213_to_fp16)[name = tensor("op_214_cast_fp16")]; tensor input_11_cast_fp16 = add(x = attn_5_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor attn_7_cast_fp16 = softmax(axis = var_94, x = input_11_cast_fp16)[name = tensor("attn_7_cast_fp16")]; tensor out_3_transpose_x_0 = const()[name = tensor("out_3_transpose_x_0"), val = tensor(false)]; tensor out_3_transpose_y_0 = const()[name = tensor("out_3_transpose_y_0"), val = tensor(false)]; tensor v_3_cast_fp16 = transpose(perm = v_3_perm_0, x = var_203_cast_fp16)[name = tensor("transpose_235")]; tensor out_3_cast_fp16 = matmul(transpose_x = out_3_transpose_x_0, transpose_y = out_3_transpose_y_0, x = attn_7_cast_fp16, y = v_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; tensor var_218_perm_0 = const()[name = tensor("op_218_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_219 = const()[name = tensor("op_219"), val = tensor([1, 1, -1])]; tensor var_218_cast_fp16 = transpose(perm = var_218_perm_0, x = out_3_cast_fp16)[name = tensor("transpose_232")]; tensor input_13_cast_fp16 = reshape(shape = var_219, x = var_218_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor layers_0_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_0_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8467584)))]; tensor linear_4_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_cross_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("linear_4_cast_fp16")]; tensor input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_4_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor x_5_axes_0 = const()[name = tensor("x_5_axes_0"), val = tensor([-1])]; tensor layers_0_norm_ff_weight_to_fp16 = const()[name = tensor("layers_0_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8664256)))]; tensor x_5_cast_fp16 = layer_norm(axes = x_5_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_ff_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_5_cast_fp16")]; tensor input_17_perm_0 = const()[name = tensor("input_17_perm_0"), val = tensor([0, 2, 1])]; tensor input_19_pad_type_0 = const()[name = tensor("input_19_pad_type_0"), val = tensor("valid")]; tensor input_19_strides_0 = const()[name = tensor("input_19_strides_0"), val = tensor([1])]; tensor input_19_pad_0 = const()[name = tensor("input_19_pad_0"), val = tensor([0, 0])]; tensor input_19_dilations_0 = const()[name = tensor("input_19_dilations_0"), val = tensor([1])]; tensor input_19_groups_0 = const()[name = tensor("input_19_groups_0"), val = tensor(1)]; tensor layers_0_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_0_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8665856)))]; tensor input_17_cast_fp16 = transpose(perm = input_17_perm_0, x = x_5_cast_fp16)[name = tensor("transpose_231")]; tensor input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = layers_0_ffn_conv1_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_21_mode_0 = const()[name = tensor("input_21_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor x_7_pad_type_0 = const()[name = tensor("x_7_pad_type_0"), val = tensor("valid")]; tensor x_7_strides_0 = const()[name = tensor("x_7_strides_0"), val = tensor([1])]; tensor x_7_pad_0 = const()[name = tensor("x_7_pad_0"), val = tensor([0, 0])]; tensor x_7_dilations_0 = const()[name = tensor("x_7_dilations_0"), val = tensor([1])]; tensor x_7_groups_0 = const()[name = tensor("x_7_groups_0"), val = tensor(1)]; tensor layers_0_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_0_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13384512)))]; tensor x_7_cast_fp16 = conv(dilations = x_7_dilations_0, groups = x_7_groups_0, pad = x_7_pad_0, pad_type = x_7_pad_type_0, strides = x_7_strides_0, weight = layers_0_ffn_conv2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("x_7_cast_fp16")]; tensor x_9_perm_0 = const()[name = tensor("x_9_perm_0"), val = tensor([0, 2, 1])]; tensor x_9_cast_fp16 = transpose(perm = x_9_perm_0, x = x_7_cast_fp16)[name = tensor("transpose_230")]; tensor input_23_cast_fp16 = add(x = input_15_cast_fp16, y = x_9_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor var_264 = const()[name = tensor("op_264"), val = tensor(-1)]; tensor x_11_axes_0 = const()[name = tensor("x_11_axes_0"), val = tensor([-1])]; tensor layers_1_norm_sa_weight_to_fp16 = const()[name = tensor("layers_1_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18103168)))]; tensor var_267_to_fp16 = const()[name = tensor("op_267_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_11_cast_fp16 = layer_norm(axes = x_11_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_sa_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("x_11_cast_fp16")]; tensor layers_1_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18104768)))]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_qkv_proj_weight_to_fp16, x = x_11_cast_fp16)[name = tensor("linear_5_cast_fp16")]; tensor var_286 = const()[name = tensor("op_286"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_7_cast_fp16 = reshape(shape = var_286, x = linear_5_cast_fp16)[name = tensor("qkv_7_cast_fp16")]; tensor q_5_begin_0 = const()[name = tensor("q_5_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_5_end_0 = const()[name = tensor("q_5_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_5_end_mask_0 = const()[name = tensor("q_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_5_squeeze_mask_0 = const()[name = tensor("q_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor k_5_begin_0 = const()[name = tensor("k_5_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_5_end_0 = const()[name = tensor("k_5_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_5_end_mask_0 = const()[name = tensor("k_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_5_squeeze_mask_0 = const()[name = tensor("k_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_5_cast_fp16 = slice_by_index(begin = k_5_begin_0, end = k_5_end_0, end_mask = k_5_end_mask_0, squeeze_mask = k_5_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("k_5_cast_fp16")]; tensor v_5_begin_0 = const()[name = tensor("v_5_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_5_end_0 = const()[name = tensor("v_5_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_5_end_mask_0 = const()[name = tensor("v_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_5_squeeze_mask_0 = const()[name = tensor("v_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_5_cast_fp16 = slice_by_index(begin = v_5_begin_0, end = v_5_end_0, end_mask = v_5_end_mask_0, squeeze_mask = v_5_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("v_5_cast_fp16")]; tensor var_298_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position1)[name = tensor("op_298_cast_fp16")]; tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 512, 1, 1])]; tensor var_299_to_fp16_dtype_0 = const()[name = tensor("op_299_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_298_cast_fp16_to_fp16 = cast(dtype = var_299_to_fp16_dtype_0, x = var_298_cast_fp16)[name = tensor("cast_183")]; tensor mask_3_cast_fp16 = reshape(shape = var_300, x = var_298_cast_fp16_to_fp16)[name = tensor("mask_3_cast_fp16")]; tensor k_new_3_reps_0 = const()[name = tensor("k_new_3_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_3_cast_fp16 = tile(reps = k_new_3_reps_0, x = k_5_cast_fp16)[name = tensor("k_new_3_cast_fp16")]; tensor v_new_3_reps_0 = const()[name = tensor("v_new_3_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_3_cast_fp16 = tile(reps = v_new_3_reps_0, x = v_5_cast_fp16)[name = tensor("v_new_3_cast_fp16")]; tensor var_262_to_fp16 = const()[name = tensor("op_262_to_fp16"), val = tensor(0x1p+0)]; tensor var_306_cast_fp16 = sub(x = var_262_to_fp16, y = mask_3_cast_fp16)[name = tensor("op_306_cast_fp16")]; tensor var_307_cast_fp16 = mul(x = cache_k1, y = var_306_cast_fp16)[name = tensor("op_307_cast_fp16")]; tensor var_308_cast_fp16 = mul(x = k_new_3_cast_fp16, y = mask_3_cast_fp16)[name = tensor("op_308_cast_fp16")]; tensor new_k_3 = add(x = var_307_cast_fp16, y = var_308_cast_fp16)[name = tensor("new_k_3_cast_fp16")]; tensor var_311_cast_fp16 = mul(x = cache_v1, y = var_306_cast_fp16)[name = tensor("op_311_cast_fp16")]; tensor var_312_cast_fp16 = mul(x = v_new_3_cast_fp16, y = mask_3_cast_fp16)[name = tensor("op_312_cast_fp16")]; tensor new_v_3 = add(x = var_311_cast_fp16, y = var_312_cast_fp16)[name = tensor("new_v_3_cast_fp16")]; tensor var_314_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position1)[name = tensor("op_314_cast_fp16")]; tensor var_316 = const()[name = tensor("op_316"), val = tensor([1, 1, 1, 512])]; tensor var_315_to_fp16_dtype_0 = const()[name = tensor("op_315_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_314_cast_fp16_to_fp16 = cast(dtype = var_315_to_fp16_dtype_0, x = var_314_cast_fp16)[name = tensor("cast_182")]; tensor var_317_cast_fp16 = reshape(shape = var_316, x = var_314_cast_fp16_to_fp16)[name = tensor("op_317_cast_fp16")]; tensor var_321 = const()[name = tensor("op_321"), val = tensor([0, 2, 1, 3])]; tensor var_324_transpose_x_0 = const()[name = tensor("op_324_transpose_x_0"), val = tensor(false)]; tensor var_324_transpose_y_0 = const()[name = tensor("op_324_transpose_y_0"), val = tensor(false)]; tensor transpose_76_perm_0 = const()[name = tensor("transpose_76_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_77_perm_0 = const()[name = tensor("transpose_77_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_77 = transpose(perm = transpose_77_perm_0, x = new_k_3)[name = tensor("transpose_227")]; tensor transpose_76 = transpose(perm = transpose_76_perm_0, x = q_5_cast_fp16)[name = tensor("transpose_228")]; tensor var_324_cast_fp16 = matmul(transpose_x = var_324_transpose_x_0, transpose_y = var_324_transpose_y_0, x = transpose_76, y = transpose_77)[name = tensor("op_324_cast_fp16")]; tensor var_325_to_fp16 = const()[name = tensor("op_325_to_fp16"), val = tensor(0x1p-3)]; tensor attn_9_cast_fp16 = mul(x = var_324_cast_fp16, y = var_325_to_fp16)[name = tensor("attn_9_cast_fp16")]; tensor var_327_cast_fp16 = sub(x = var_262_to_fp16, y = var_317_cast_fp16)[name = tensor("op_327_cast_fp16")]; tensor var_328_to_fp16 = const()[name = tensor("op_328_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_329_cast_fp16 = mul(x = var_327_cast_fp16, y = var_328_to_fp16)[name = tensor("op_329_cast_fp16")]; tensor input_25_cast_fp16 = add(x = attn_9_cast_fp16, y = var_329_cast_fp16)[name = tensor("input_25_cast_fp16")]; tensor attn_11_cast_fp16 = softmax(axis = var_264, x = input_25_cast_fp16)[name = tensor("attn_11_cast_fp16")]; tensor out_5_transpose_x_0 = const()[name = tensor("out_5_transpose_x_0"), val = tensor(false)]; tensor out_5_transpose_y_0 = const()[name = tensor("out_5_transpose_y_0"), val = tensor(false)]; tensor v4_3_cast_fp16 = transpose(perm = var_321, x = new_v_3)[name = tensor("transpose_229")]; tensor out_5_cast_fp16 = matmul(transpose_x = out_5_transpose_x_0, transpose_y = out_5_transpose_y_0, x = attn_11_cast_fp16, y = v4_3_cast_fp16)[name = tensor("out_5_cast_fp16")]; tensor var_333_perm_0 = const()[name = tensor("op_333_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_334 = const()[name = tensor("op_334"), val = tensor([1, 1, -1])]; tensor var_333_cast_fp16 = transpose(perm = var_333_perm_0, x = out_5_cast_fp16)[name = tensor("transpose_226")]; tensor input_27_cast_fp16 = reshape(shape = var_334, x = var_333_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21643776)))]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("linear_6_cast_fp16")]; tensor var_338_to_fp16 = const()[name = tensor("op_338_to_fp16"), val = tensor(0x1p+0)]; tensor var_339 = add(x = position1, y = var_338_to_fp16)[name = tensor("op_339_cast_fp16")]; tensor input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_6_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor x_13_axes_0 = const()[name = tensor("x_13_axes_0"), val = tensor([-1])]; tensor layers_1_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_1_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22823488)))]; tensor x_13_cast_fp16 = layer_norm(axes = x_13_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_xa_query_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("x_13_cast_fp16")]; tensor memory_3_axes_0 = const()[name = tensor("memory_3_axes_0"), val = tensor([-1])]; tensor layers_1_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_1_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22825088)))]; tensor memory_3_cast_fp16 = layer_norm(axes = memory_3_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_3_cast_fp16")]; tensor layers_1_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_1_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22826688)))]; tensor linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_1_cross_attn_q_proj_weight_to_fp16, x = x_13_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor var_360 = const()[name = tensor("op_360"), val = tensor([1, 1, 1, 128])]; tensor var_361_cast_fp16 = reshape(shape = var_360, x = linear_7_cast_fp16)[name = tensor("op_361_cast_fp16")]; tensor layers_1_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_1_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23023360)))]; tensor linear_8_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_1_cross_attn_kv_proj_weight_to_fp16, x = memory_3_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor var_365 = const()[name = tensor("op_365"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_3_cast_fp16 = reshape(shape = var_365, x = linear_8_cast_fp16)[name = tensor("kv_3_cast_fp16")]; tensor var_369_begin_0 = const()[name = tensor("op_369_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_369_end_0 = const()[name = tensor("op_369_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_369_end_mask_0 = const()[name = tensor("op_369_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_369_squeeze_mask_0 = const()[name = tensor("op_369_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_369_cast_fp16 = slice_by_index(begin = var_369_begin_0, end = var_369_end_0, end_mask = var_369_end_mask_0, squeeze_mask = var_369_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor("op_369_cast_fp16")]; tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_373_squeeze_mask_0 = const()[name = tensor("op_373_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, squeeze_mask = var_373_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor("op_373_cast_fp16")]; tensor v_7_perm_0 = const()[name = tensor("v_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_376_transpose_x_0 = const()[name = tensor("op_376_transpose_x_0"), val = tensor(false)]; tensor var_376_transpose_y_0 = const()[name = tensor("op_376_transpose_y_0"), val = tensor(false)]; tensor transpose_78_perm_0 = const()[name = tensor("transpose_78_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_79_perm_0 = const()[name = tensor("transpose_79_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_79 = transpose(perm = transpose_79_perm_0, x = var_369_cast_fp16)[name = tensor("transpose_223")]; tensor transpose_78 = transpose(perm = transpose_78_perm_0, x = var_361_cast_fp16)[name = tensor("transpose_224")]; tensor var_376_cast_fp16 = matmul(transpose_x = var_376_transpose_x_0, transpose_y = var_376_transpose_y_0, x = transpose_78, y = transpose_79)[name = tensor("op_376_cast_fp16")]; tensor var_377_to_fp16 = const()[name = tensor("op_377_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_13_cast_fp16 = mul(x = var_376_cast_fp16, y = var_377_to_fp16)[name = tensor("attn_13_cast_fp16")]; tensor input_31_cast_fp16 = add(x = attn_13_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor attn_15_cast_fp16 = softmax(axis = var_264, x = input_31_cast_fp16)[name = tensor("attn_15_cast_fp16")]; tensor out_7_transpose_x_0 = const()[name = tensor("out_7_transpose_x_0"), val = tensor(false)]; tensor out_7_transpose_y_0 = const()[name = tensor("out_7_transpose_y_0"), val = tensor(false)]; tensor v_7_cast_fp16 = transpose(perm = v_7_perm_0, x = var_373_cast_fp16)[name = tensor("transpose_225")]; tensor out_7_cast_fp16 = matmul(transpose_x = out_7_transpose_x_0, transpose_y = out_7_transpose_y_0, x = attn_15_cast_fp16, y = v_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; tensor var_388_perm_0 = const()[name = tensor("op_388_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_389 = const()[name = tensor("op_389"), val = tensor([1, 1, -1])]; tensor var_388_cast_fp16 = transpose(perm = var_388_perm_0, x = out_7_cast_fp16)[name = tensor("transpose_222")]; tensor input_33_cast_fp16 = reshape(shape = var_389, x = var_388_cast_fp16)[name = tensor("input_33_cast_fp16")]; tensor layers_1_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_1_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23416640)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_cross_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor input_35_cast_fp16 = add(x = input_29_cast_fp16, y = linear_9_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor x_15_axes_0 = const()[name = tensor("x_15_axes_0"), val = tensor([-1])]; tensor layers_1_norm_ff_weight_to_fp16 = const()[name = tensor("layers_1_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23613312)))]; tensor x_15_cast_fp16 = layer_norm(axes = x_15_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_ff_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("x_15_cast_fp16")]; tensor input_37_perm_0 = const()[name = tensor("input_37_perm_0"), val = tensor([0, 2, 1])]; tensor input_39_pad_type_0 = const()[name = tensor("input_39_pad_type_0"), val = tensor("valid")]; tensor input_39_strides_0 = const()[name = tensor("input_39_strides_0"), val = tensor([1])]; tensor input_39_pad_0 = const()[name = tensor("input_39_pad_0"), val = tensor([0, 0])]; tensor input_39_dilations_0 = const()[name = tensor("input_39_dilations_0"), val = tensor([1])]; tensor input_39_groups_0 = const()[name = tensor("input_39_groups_0"), val = tensor(1)]; tensor layers_1_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_1_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23614912)))]; tensor input_37_cast_fp16 = transpose(perm = input_37_perm_0, x = x_15_cast_fp16)[name = tensor("transpose_221")]; tensor input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = layers_1_ffn_conv1_weight_to_fp16, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor input_41_mode_0 = const()[name = tensor("input_41_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor("input_41_cast_fp16")]; tensor x_17_pad_type_0 = const()[name = tensor("x_17_pad_type_0"), val = tensor("valid")]; tensor x_17_strides_0 = const()[name = tensor("x_17_strides_0"), val = tensor([1])]; tensor x_17_pad_0 = const()[name = tensor("x_17_pad_0"), val = tensor([0, 0])]; tensor x_17_dilations_0 = const()[name = tensor("x_17_dilations_0"), val = tensor([1])]; tensor x_17_groups_0 = const()[name = tensor("x_17_groups_0"), val = tensor(1)]; tensor layers_1_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_1_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28333568)))]; tensor x_17_cast_fp16 = conv(dilations = x_17_dilations_0, groups = x_17_groups_0, pad = x_17_pad_0, pad_type = x_17_pad_type_0, strides = x_17_strides_0, weight = layers_1_ffn_conv2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("x_17_cast_fp16")]; tensor x_19_perm_0 = const()[name = tensor("x_19_perm_0"), val = tensor([0, 2, 1])]; tensor x_19_cast_fp16 = transpose(perm = x_19_perm_0, x = x_17_cast_fp16)[name = tensor("transpose_220")]; tensor input_43_cast_fp16 = add(x = input_35_cast_fp16, y = x_19_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor var_434 = const()[name = tensor("op_434"), val = tensor(-1)]; tensor x_21_axes_0 = const()[name = tensor("x_21_axes_0"), val = tensor([-1])]; tensor layers_2_norm_sa_weight_to_fp16 = const()[name = tensor("layers_2_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33052224)))]; tensor var_437_to_fp16 = const()[name = tensor("op_437_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_21_cast_fp16 = layer_norm(axes = x_21_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_sa_weight_to_fp16, x = input_43_cast_fp16)[name = tensor("x_21_cast_fp16")]; tensor layers_2_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33053824)))]; tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_qkv_proj_weight_to_fp16, x = x_21_cast_fp16)[name = tensor("linear_10_cast_fp16")]; tensor var_456 = const()[name = tensor("op_456"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_11_cast_fp16 = reshape(shape = var_456, x = linear_10_cast_fp16)[name = tensor("qkv_11_cast_fp16")]; tensor q_9_begin_0 = const()[name = tensor("q_9_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_9_end_0 = const()[name = tensor("q_9_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_9_end_mask_0 = const()[name = tensor("q_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_9_squeeze_mask_0 = const()[name = tensor("q_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; tensor k_9_begin_0 = const()[name = tensor("k_9_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_9_end_0 = const()[name = tensor("k_9_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_9_end_mask_0 = const()[name = tensor("k_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_9_squeeze_mask_0 = const()[name = tensor("k_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_9_cast_fp16 = slice_by_index(begin = k_9_begin_0, end = k_9_end_0, end_mask = k_9_end_mask_0, squeeze_mask = k_9_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("k_9_cast_fp16")]; tensor v_9_begin_0 = const()[name = tensor("v_9_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_9_end_0 = const()[name = tensor("v_9_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_9_end_mask_0 = const()[name = tensor("v_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_9_squeeze_mask_0 = const()[name = tensor("v_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_9_cast_fp16 = slice_by_index(begin = v_9_begin_0, end = v_9_end_0, end_mask = v_9_end_mask_0, squeeze_mask = v_9_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor var_468_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position2)[name = tensor("op_468_cast_fp16")]; tensor var_470 = const()[name = tensor("op_470"), val = tensor([1, 512, 1, 1])]; tensor var_469_to_fp16_dtype_0 = const()[name = tensor("op_469_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_468_cast_fp16_to_fp16 = cast(dtype = var_469_to_fp16_dtype_0, x = var_468_cast_fp16)[name = tensor("cast_181")]; tensor mask_5_cast_fp16 = reshape(shape = var_470, x = var_468_cast_fp16_to_fp16)[name = tensor("mask_5_cast_fp16")]; tensor k_new_5_reps_0 = const()[name = tensor("k_new_5_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_5_cast_fp16 = tile(reps = k_new_5_reps_0, x = k_9_cast_fp16)[name = tensor("k_new_5_cast_fp16")]; tensor v_new_5_reps_0 = const()[name = tensor("v_new_5_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_5_cast_fp16 = tile(reps = v_new_5_reps_0, x = v_9_cast_fp16)[name = tensor("v_new_5_cast_fp16")]; tensor var_432_to_fp16 = const()[name = tensor("op_432_to_fp16"), val = tensor(0x1p+0)]; tensor var_476_cast_fp16 = sub(x = var_432_to_fp16, y = mask_5_cast_fp16)[name = tensor("op_476_cast_fp16")]; tensor var_477_cast_fp16 = mul(x = cache_k2, y = var_476_cast_fp16)[name = tensor("op_477_cast_fp16")]; tensor var_478_cast_fp16 = mul(x = k_new_5_cast_fp16, y = mask_5_cast_fp16)[name = tensor("op_478_cast_fp16")]; tensor new_k_5 = add(x = var_477_cast_fp16, y = var_478_cast_fp16)[name = tensor("new_k_5_cast_fp16")]; tensor var_481_cast_fp16 = mul(x = cache_v2, y = var_476_cast_fp16)[name = tensor("op_481_cast_fp16")]; tensor var_482_cast_fp16 = mul(x = v_new_5_cast_fp16, y = mask_5_cast_fp16)[name = tensor("op_482_cast_fp16")]; tensor new_v_5 = add(x = var_481_cast_fp16, y = var_482_cast_fp16)[name = tensor("new_v_5_cast_fp16")]; tensor var_484_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position2)[name = tensor("op_484_cast_fp16")]; tensor var_486 = const()[name = tensor("op_486"), val = tensor([1, 1, 1, 512])]; tensor var_485_to_fp16_dtype_0 = const()[name = tensor("op_485_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_484_cast_fp16_to_fp16 = cast(dtype = var_485_to_fp16_dtype_0, x = var_484_cast_fp16)[name = tensor("cast_180")]; tensor var_487_cast_fp16 = reshape(shape = var_486, x = var_484_cast_fp16_to_fp16)[name = tensor("op_487_cast_fp16")]; tensor var_491 = const()[name = tensor("op_491"), val = tensor([0, 2, 1, 3])]; tensor var_494_transpose_x_0 = const()[name = tensor("op_494_transpose_x_0"), val = tensor(false)]; tensor var_494_transpose_y_0 = const()[name = tensor("op_494_transpose_y_0"), val = tensor(false)]; tensor transpose_80_perm_0 = const()[name = tensor("transpose_80_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_81_perm_0 = const()[name = tensor("transpose_81_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_81 = transpose(perm = transpose_81_perm_0, x = new_k_5)[name = tensor("transpose_217")]; tensor transpose_80 = transpose(perm = transpose_80_perm_0, x = q_9_cast_fp16)[name = tensor("transpose_218")]; tensor var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_0, transpose_y = var_494_transpose_y_0, x = transpose_80, y = transpose_81)[name = tensor("op_494_cast_fp16")]; tensor var_495_to_fp16 = const()[name = tensor("op_495_to_fp16"), val = tensor(0x1p-3)]; tensor attn_17_cast_fp16 = mul(x = var_494_cast_fp16, y = var_495_to_fp16)[name = tensor("attn_17_cast_fp16")]; tensor var_497_cast_fp16 = sub(x = var_432_to_fp16, y = var_487_cast_fp16)[name = tensor("op_497_cast_fp16")]; tensor var_498_to_fp16 = const()[name = tensor("op_498_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_499_cast_fp16 = mul(x = var_497_cast_fp16, y = var_498_to_fp16)[name = tensor("op_499_cast_fp16")]; tensor input_45_cast_fp16 = add(x = attn_17_cast_fp16, y = var_499_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor attn_19_cast_fp16 = softmax(axis = var_434, x = input_45_cast_fp16)[name = tensor("attn_19_cast_fp16")]; tensor out_9_transpose_x_0 = const()[name = tensor("out_9_transpose_x_0"), val = tensor(false)]; tensor out_9_transpose_y_0 = const()[name = tensor("out_9_transpose_y_0"), val = tensor(false)]; tensor v4_5_cast_fp16 = transpose(perm = var_491, x = new_v_5)[name = tensor("transpose_219")]; tensor out_9_cast_fp16 = matmul(transpose_x = out_9_transpose_x_0, transpose_y = out_9_transpose_y_0, x = attn_19_cast_fp16, y = v4_5_cast_fp16)[name = tensor("out_9_cast_fp16")]; tensor var_503_perm_0 = const()[name = tensor("op_503_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_504 = const()[name = tensor("op_504"), val = tensor([1, 1, -1])]; tensor var_503_cast_fp16 = transpose(perm = var_503_perm_0, x = out_9_cast_fp16)[name = tensor("transpose_216")]; tensor input_47_cast_fp16 = reshape(shape = var_504, x = var_503_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36592832)))]; tensor linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("linear_11_cast_fp16")]; tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1p+0)]; tensor var_509 = add(x = position2, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; tensor input_49_cast_fp16 = add(x = input_43_cast_fp16, y = linear_11_cast_fp16)[name = tensor("input_49_cast_fp16")]; tensor x_23_axes_0 = const()[name = tensor("x_23_axes_0"), val = tensor([-1])]; tensor layers_2_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_2_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37772544)))]; tensor x_23_cast_fp16 = layer_norm(axes = x_23_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_xa_query_weight_to_fp16, x = input_49_cast_fp16)[name = tensor("x_23_cast_fp16")]; tensor memory_5_axes_0 = const()[name = tensor("memory_5_axes_0"), val = tensor([-1])]; tensor layers_2_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_2_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37774144)))]; tensor memory_5_cast_fp16 = layer_norm(axes = memory_5_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_5_cast_fp16")]; tensor layers_2_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_2_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37775744)))]; tensor linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_2_cross_attn_q_proj_weight_to_fp16, x = x_23_cast_fp16)[name = tensor("linear_12_cast_fp16")]; tensor var_530 = const()[name = tensor("op_530"), val = tensor([1, 1, 1, 128])]; tensor var_531_cast_fp16 = reshape(shape = var_530, x = linear_12_cast_fp16)[name = tensor("op_531_cast_fp16")]; tensor layers_2_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_2_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37972416)))]; tensor linear_13_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_2_cross_attn_kv_proj_weight_to_fp16, x = memory_5_cast_fp16)[name = tensor("linear_13_cast_fp16")]; tensor var_535 = const()[name = tensor("op_535"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_5_cast_fp16 = reshape(shape = var_535, x = linear_13_cast_fp16)[name = tensor("kv_5_cast_fp16")]; tensor var_539_begin_0 = const()[name = tensor("op_539_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_539_end_0 = const()[name = tensor("op_539_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_539_end_mask_0 = const()[name = tensor("op_539_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_539_squeeze_mask_0 = const()[name = tensor("op_539_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_539_cast_fp16 = slice_by_index(begin = var_539_begin_0, end = var_539_end_0, end_mask = var_539_end_mask_0, squeeze_mask = var_539_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor("op_539_cast_fp16")]; tensor var_543_begin_0 = const()[name = tensor("op_543_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_543_end_0 = const()[name = tensor("op_543_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_543_end_mask_0 = const()[name = tensor("op_543_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_543_squeeze_mask_0 = const()[name = tensor("op_543_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_543_cast_fp16 = slice_by_index(begin = var_543_begin_0, end = var_543_end_0, end_mask = var_543_end_mask_0, squeeze_mask = var_543_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor("op_543_cast_fp16")]; tensor v_11_perm_0 = const()[name = tensor("v_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_546_transpose_x_0 = const()[name = tensor("op_546_transpose_x_0"), val = tensor(false)]; tensor var_546_transpose_y_0 = const()[name = tensor("op_546_transpose_y_0"), val = tensor(false)]; tensor transpose_82_perm_0 = const()[name = tensor("transpose_82_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_83_perm_0 = const()[name = tensor("transpose_83_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_83 = transpose(perm = transpose_83_perm_0, x = var_539_cast_fp16)[name = tensor("transpose_213")]; tensor transpose_82 = transpose(perm = transpose_82_perm_0, x = var_531_cast_fp16)[name = tensor("transpose_214")]; tensor var_546_cast_fp16 = matmul(transpose_x = var_546_transpose_x_0, transpose_y = var_546_transpose_y_0, x = transpose_82, y = transpose_83)[name = tensor("op_546_cast_fp16")]; tensor var_547_to_fp16 = const()[name = tensor("op_547_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_21_cast_fp16 = mul(x = var_546_cast_fp16, y = var_547_to_fp16)[name = tensor("attn_21_cast_fp16")]; tensor input_51_cast_fp16 = add(x = attn_21_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor attn_23_cast_fp16 = softmax(axis = var_434, x = input_51_cast_fp16)[name = tensor("attn_23_cast_fp16")]; tensor out_11_transpose_x_0 = const()[name = tensor("out_11_transpose_x_0"), val = tensor(false)]; tensor out_11_transpose_y_0 = const()[name = tensor("out_11_transpose_y_0"), val = tensor(false)]; tensor v_11_cast_fp16 = transpose(perm = v_11_perm_0, x = var_543_cast_fp16)[name = tensor("transpose_215")]; tensor out_11_cast_fp16 = matmul(transpose_x = out_11_transpose_x_0, transpose_y = out_11_transpose_y_0, x = attn_23_cast_fp16, y = v_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; tensor var_558_perm_0 = const()[name = tensor("op_558_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_559 = const()[name = tensor("op_559"), val = tensor([1, 1, -1])]; tensor var_558_cast_fp16 = transpose(perm = var_558_perm_0, x = out_11_cast_fp16)[name = tensor("transpose_212")]; tensor input_53_cast_fp16 = reshape(shape = var_559, x = var_558_cast_fp16)[name = tensor("input_53_cast_fp16")]; tensor layers_2_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_2_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38365696)))]; tensor linear_14_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_cross_attn_o_proj_weight_to_fp16, x = input_53_cast_fp16)[name = tensor("linear_14_cast_fp16")]; tensor input_55_cast_fp16 = add(x = input_49_cast_fp16, y = linear_14_cast_fp16)[name = tensor("input_55_cast_fp16")]; tensor x_25_axes_0 = const()[name = tensor("x_25_axes_0"), val = tensor([-1])]; tensor layers_2_norm_ff_weight_to_fp16 = const()[name = tensor("layers_2_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38562368)))]; tensor x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_ff_weight_to_fp16, x = input_55_cast_fp16)[name = tensor("x_25_cast_fp16")]; tensor input_57_perm_0 = const()[name = tensor("input_57_perm_0"), val = tensor([0, 2, 1])]; tensor input_59_pad_type_0 = const()[name = tensor("input_59_pad_type_0"), val = tensor("valid")]; tensor input_59_strides_0 = const()[name = tensor("input_59_strides_0"), val = tensor([1])]; tensor input_59_pad_0 = const()[name = tensor("input_59_pad_0"), val = tensor([0, 0])]; tensor input_59_dilations_0 = const()[name = tensor("input_59_dilations_0"), val = tensor([1])]; tensor input_59_groups_0 = const()[name = tensor("input_59_groups_0"), val = tensor(1)]; tensor layers_2_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_2_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38563968)))]; tensor input_57_cast_fp16 = transpose(perm = input_57_perm_0, x = x_25_cast_fp16)[name = tensor("transpose_211")]; tensor input_59_cast_fp16 = conv(dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = layers_2_ffn_conv1_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor input_61_mode_0 = const()[name = tensor("input_61_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor x_27_pad_type_0 = const()[name = tensor("x_27_pad_type_0"), val = tensor("valid")]; tensor x_27_strides_0 = const()[name = tensor("x_27_strides_0"), val = tensor([1])]; tensor x_27_pad_0 = const()[name = tensor("x_27_pad_0"), val = tensor([0, 0])]; tensor x_27_dilations_0 = const()[name = tensor("x_27_dilations_0"), val = tensor([1])]; tensor x_27_groups_0 = const()[name = tensor("x_27_groups_0"), val = tensor(1)]; tensor layers_2_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_2_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43282624)))]; tensor x_27_cast_fp16 = conv(dilations = x_27_dilations_0, groups = x_27_groups_0, pad = x_27_pad_0, pad_type = x_27_pad_type_0, strides = x_27_strides_0, weight = layers_2_ffn_conv2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor("x_27_cast_fp16")]; tensor x_29_perm_0 = const()[name = tensor("x_29_perm_0"), val = tensor([0, 2, 1])]; tensor x_29_cast_fp16 = transpose(perm = x_29_perm_0, x = x_27_cast_fp16)[name = tensor("transpose_210")]; tensor input_63_cast_fp16 = add(x = input_55_cast_fp16, y = x_29_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor var_604 = const()[name = tensor("op_604"), val = tensor(-1)]; tensor x_31_axes_0 = const()[name = tensor("x_31_axes_0"), val = tensor([-1])]; tensor layers_3_norm_sa_weight_to_fp16 = const()[name = tensor("layers_3_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48001280)))]; tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_31_cast_fp16 = layer_norm(axes = x_31_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_sa_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("x_31_cast_fp16")]; tensor layers_3_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48002880)))]; tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_qkv_proj_weight_to_fp16, x = x_31_cast_fp16)[name = tensor("linear_15_cast_fp16")]; tensor var_626 = const()[name = tensor("op_626"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_15_cast_fp16 = reshape(shape = var_626, x = linear_15_cast_fp16)[name = tensor("qkv_15_cast_fp16")]; tensor q_13_begin_0 = const()[name = tensor("q_13_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_13_end_0 = const()[name = tensor("q_13_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_13_end_mask_0 = const()[name = tensor("q_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_13_squeeze_mask_0 = const()[name = tensor("q_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("q_13_cast_fp16")]; tensor k_13_begin_0 = const()[name = tensor("k_13_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_13_end_0 = const()[name = tensor("k_13_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_13_end_mask_0 = const()[name = tensor("k_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_13_squeeze_mask_0 = const()[name = tensor("k_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_13_cast_fp16 = slice_by_index(begin = k_13_begin_0, end = k_13_end_0, end_mask = k_13_end_mask_0, squeeze_mask = k_13_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("k_13_cast_fp16")]; tensor v_13_begin_0 = const()[name = tensor("v_13_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_13_end_0 = const()[name = tensor("v_13_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_13_end_mask_0 = const()[name = tensor("v_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_13_squeeze_mask_0 = const()[name = tensor("v_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_13_cast_fp16 = slice_by_index(begin = v_13_begin_0, end = v_13_end_0, end_mask = v_13_end_mask_0, squeeze_mask = v_13_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("v_13_cast_fp16")]; tensor var_638_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position3)[name = tensor("op_638_cast_fp16")]; tensor var_640 = const()[name = tensor("op_640"), val = tensor([1, 512, 1, 1])]; tensor var_639_to_fp16_dtype_0 = const()[name = tensor("op_639_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_638_cast_fp16_to_fp16 = cast(dtype = var_639_to_fp16_dtype_0, x = var_638_cast_fp16)[name = tensor("cast_179")]; tensor mask_7_cast_fp16 = reshape(shape = var_640, x = var_638_cast_fp16_to_fp16)[name = tensor("mask_7_cast_fp16")]; tensor k_new_7_reps_0 = const()[name = tensor("k_new_7_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_7_cast_fp16 = tile(reps = k_new_7_reps_0, x = k_13_cast_fp16)[name = tensor("k_new_7_cast_fp16")]; tensor v_new_7_reps_0 = const()[name = tensor("v_new_7_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_7_cast_fp16 = tile(reps = v_new_7_reps_0, x = v_13_cast_fp16)[name = tensor("v_new_7_cast_fp16")]; tensor var_602_to_fp16 = const()[name = tensor("op_602_to_fp16"), val = tensor(0x1p+0)]; tensor var_646_cast_fp16 = sub(x = var_602_to_fp16, y = mask_7_cast_fp16)[name = tensor("op_646_cast_fp16")]; tensor var_647_cast_fp16 = mul(x = cache_k3, y = var_646_cast_fp16)[name = tensor("op_647_cast_fp16")]; tensor var_648_cast_fp16 = mul(x = k_new_7_cast_fp16, y = mask_7_cast_fp16)[name = tensor("op_648_cast_fp16")]; tensor new_k_7 = add(x = var_647_cast_fp16, y = var_648_cast_fp16)[name = tensor("new_k_7_cast_fp16")]; tensor var_651_cast_fp16 = mul(x = cache_v3, y = var_646_cast_fp16)[name = tensor("op_651_cast_fp16")]; tensor var_652_cast_fp16 = mul(x = v_new_7_cast_fp16, y = mask_7_cast_fp16)[name = tensor("op_652_cast_fp16")]; tensor new_v_7 = add(x = var_651_cast_fp16, y = var_652_cast_fp16)[name = tensor("new_v_7_cast_fp16")]; tensor var_654_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position3)[name = tensor("op_654_cast_fp16")]; tensor var_656 = const()[name = tensor("op_656"), val = tensor([1, 1, 1, 512])]; tensor var_655_to_fp16_dtype_0 = const()[name = tensor("op_655_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_654_cast_fp16_to_fp16 = cast(dtype = var_655_to_fp16_dtype_0, x = var_654_cast_fp16)[name = tensor("cast_178")]; tensor var_657_cast_fp16 = reshape(shape = var_656, x = var_654_cast_fp16_to_fp16)[name = tensor("op_657_cast_fp16")]; tensor var_661 = const()[name = tensor("op_661"), val = tensor([0, 2, 1, 3])]; tensor var_664_transpose_x_0 = const()[name = tensor("op_664_transpose_x_0"), val = tensor(false)]; tensor var_664_transpose_y_0 = const()[name = tensor("op_664_transpose_y_0"), val = tensor(false)]; tensor transpose_84_perm_0 = const()[name = tensor("transpose_84_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_85_perm_0 = const()[name = tensor("transpose_85_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_85 = transpose(perm = transpose_85_perm_0, x = new_k_7)[name = tensor("transpose_207")]; tensor transpose_84 = transpose(perm = transpose_84_perm_0, x = q_13_cast_fp16)[name = tensor("transpose_208")]; tensor var_664_cast_fp16 = matmul(transpose_x = var_664_transpose_x_0, transpose_y = var_664_transpose_y_0, x = transpose_84, y = transpose_85)[name = tensor("op_664_cast_fp16")]; tensor var_665_to_fp16 = const()[name = tensor("op_665_to_fp16"), val = tensor(0x1p-3)]; tensor attn_25_cast_fp16 = mul(x = var_664_cast_fp16, y = var_665_to_fp16)[name = tensor("attn_25_cast_fp16")]; tensor var_667_cast_fp16 = sub(x = var_602_to_fp16, y = var_657_cast_fp16)[name = tensor("op_667_cast_fp16")]; tensor var_668_to_fp16 = const()[name = tensor("op_668_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_669_cast_fp16 = mul(x = var_667_cast_fp16, y = var_668_to_fp16)[name = tensor("op_669_cast_fp16")]; tensor input_65_cast_fp16 = add(x = attn_25_cast_fp16, y = var_669_cast_fp16)[name = tensor("input_65_cast_fp16")]; tensor attn_27_cast_fp16 = softmax(axis = var_604, x = input_65_cast_fp16)[name = tensor("attn_27_cast_fp16")]; tensor out_13_transpose_x_0 = const()[name = tensor("out_13_transpose_x_0"), val = tensor(false)]; tensor out_13_transpose_y_0 = const()[name = tensor("out_13_transpose_y_0"), val = tensor(false)]; tensor v4_7_cast_fp16 = transpose(perm = var_661, x = new_v_7)[name = tensor("transpose_209")]; tensor out_13_cast_fp16 = matmul(transpose_x = out_13_transpose_x_0, transpose_y = out_13_transpose_y_0, x = attn_27_cast_fp16, y = v4_7_cast_fp16)[name = tensor("out_13_cast_fp16")]; tensor var_673_perm_0 = const()[name = tensor("op_673_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 1, -1])]; tensor var_673_cast_fp16 = transpose(perm = var_673_perm_0, x = out_13_cast_fp16)[name = tensor("transpose_206")]; tensor input_67_cast_fp16 = reshape(shape = var_674, x = var_673_cast_fp16)[name = tensor("input_67_cast_fp16")]; tensor layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51541888)))]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_67_cast_fp16)[name = tensor("linear_16_cast_fp16")]; tensor var_678_to_fp16 = const()[name = tensor("op_678_to_fp16"), val = tensor(0x1p+0)]; tensor var_679 = add(x = position3, y = var_678_to_fp16)[name = tensor("op_679_cast_fp16")]; tensor input_69_cast_fp16 = add(x = input_63_cast_fp16, y = linear_16_cast_fp16)[name = tensor("input_69_cast_fp16")]; tensor x_33_axes_0 = const()[name = tensor("x_33_axes_0"), val = tensor([-1])]; tensor layers_3_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_3_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52721600)))]; tensor x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_xa_query_weight_to_fp16, x = input_69_cast_fp16)[name = tensor("x_33_cast_fp16")]; tensor memory_7_axes_0 = const()[name = tensor("memory_7_axes_0"), val = tensor([-1])]; tensor layers_3_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_3_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52723200)))]; tensor memory_7_cast_fp16 = layer_norm(axes = memory_7_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_7_cast_fp16")]; tensor layers_3_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_3_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52724800)))]; tensor linear_17_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_3_cross_attn_q_proj_weight_to_fp16, x = x_33_cast_fp16)[name = tensor("linear_17_cast_fp16")]; tensor var_700 = const()[name = tensor("op_700"), val = tensor([1, 1, 1, 128])]; tensor var_701_cast_fp16 = reshape(shape = var_700, x = linear_17_cast_fp16)[name = tensor("op_701_cast_fp16")]; tensor layers_3_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_3_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52921472)))]; tensor linear_18_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_3_cross_attn_kv_proj_weight_to_fp16, x = memory_7_cast_fp16)[name = tensor("linear_18_cast_fp16")]; tensor var_705 = const()[name = tensor("op_705"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_7_cast_fp16 = reshape(shape = var_705, x = linear_18_cast_fp16)[name = tensor("kv_7_cast_fp16")]; tensor var_709_begin_0 = const()[name = tensor("op_709_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_709_end_0 = const()[name = tensor("op_709_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_709_end_mask_0 = const()[name = tensor("op_709_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_709_squeeze_mask_0 = const()[name = tensor("op_709_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_709_cast_fp16 = slice_by_index(begin = var_709_begin_0, end = var_709_end_0, end_mask = var_709_end_mask_0, squeeze_mask = var_709_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor("op_709_cast_fp16")]; tensor var_713_begin_0 = const()[name = tensor("op_713_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_713_end_0 = const()[name = tensor("op_713_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_713_end_mask_0 = const()[name = tensor("op_713_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_713_squeeze_mask_0 = const()[name = tensor("op_713_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_713_cast_fp16 = slice_by_index(begin = var_713_begin_0, end = var_713_end_0, end_mask = var_713_end_mask_0, squeeze_mask = var_713_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor("op_713_cast_fp16")]; tensor v_15_perm_0 = const()[name = tensor("v_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_716_transpose_x_0 = const()[name = tensor("op_716_transpose_x_0"), val = tensor(false)]; tensor var_716_transpose_y_0 = const()[name = tensor("op_716_transpose_y_0"), val = tensor(false)]; tensor transpose_86_perm_0 = const()[name = tensor("transpose_86_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_87_perm_0 = const()[name = tensor("transpose_87_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_87 = transpose(perm = transpose_87_perm_0, x = var_709_cast_fp16)[name = tensor("transpose_203")]; tensor transpose_86 = transpose(perm = transpose_86_perm_0, x = var_701_cast_fp16)[name = tensor("transpose_204")]; tensor var_716_cast_fp16 = matmul(transpose_x = var_716_transpose_x_0, transpose_y = var_716_transpose_y_0, x = transpose_86, y = transpose_87)[name = tensor("op_716_cast_fp16")]; tensor var_717_to_fp16 = const()[name = tensor("op_717_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_29_cast_fp16 = mul(x = var_716_cast_fp16, y = var_717_to_fp16)[name = tensor("attn_29_cast_fp16")]; tensor input_71_cast_fp16 = add(x = attn_29_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor attn_31_cast_fp16 = softmax(axis = var_604, x = input_71_cast_fp16)[name = tensor("attn_31_cast_fp16")]; tensor out_15_transpose_x_0 = const()[name = tensor("out_15_transpose_x_0"), val = tensor(false)]; tensor out_15_transpose_y_0 = const()[name = tensor("out_15_transpose_y_0"), val = tensor(false)]; tensor v_15_cast_fp16 = transpose(perm = v_15_perm_0, x = var_713_cast_fp16)[name = tensor("transpose_205")]; tensor out_15_cast_fp16 = matmul(transpose_x = out_15_transpose_x_0, transpose_y = out_15_transpose_y_0, x = attn_31_cast_fp16, y = v_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; tensor var_728_perm_0 = const()[name = tensor("op_728_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_729 = const()[name = tensor("op_729"), val = tensor([1, 1, -1])]; tensor var_728_cast_fp16 = transpose(perm = var_728_perm_0, x = out_15_cast_fp16)[name = tensor("transpose_202")]; tensor input_73_cast_fp16 = reshape(shape = var_729, x = var_728_cast_fp16)[name = tensor("input_73_cast_fp16")]; tensor layers_3_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_3_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53314752)))]; tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_cross_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("linear_19_cast_fp16")]; tensor input_75_cast_fp16 = add(x = input_69_cast_fp16, y = linear_19_cast_fp16)[name = tensor("input_75_cast_fp16")]; tensor x_35_axes_0 = const()[name = tensor("x_35_axes_0"), val = tensor([-1])]; tensor layers_3_norm_ff_weight_to_fp16 = const()[name = tensor("layers_3_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53511424)))]; tensor x_35_cast_fp16 = layer_norm(axes = x_35_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_ff_weight_to_fp16, x = input_75_cast_fp16)[name = tensor("x_35_cast_fp16")]; tensor input_77_perm_0 = const()[name = tensor("input_77_perm_0"), val = tensor([0, 2, 1])]; tensor input_79_pad_type_0 = const()[name = tensor("input_79_pad_type_0"), val = tensor("valid")]; tensor input_79_strides_0 = const()[name = tensor("input_79_strides_0"), val = tensor([1])]; tensor input_79_pad_0 = const()[name = tensor("input_79_pad_0"), val = tensor([0, 0])]; tensor input_79_dilations_0 = const()[name = tensor("input_79_dilations_0"), val = tensor([1])]; tensor input_79_groups_0 = const()[name = tensor("input_79_groups_0"), val = tensor(1)]; tensor layers_3_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_3_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53513024)))]; tensor input_77_cast_fp16 = transpose(perm = input_77_perm_0, x = x_35_cast_fp16)[name = tensor("transpose_201")]; tensor input_79_cast_fp16 = conv(dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = layers_3_ffn_conv1_weight_to_fp16, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; tensor input_81_mode_0 = const()[name = tensor("input_81_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor("input_81_cast_fp16")]; tensor x_37_pad_type_0 = const()[name = tensor("x_37_pad_type_0"), val = tensor("valid")]; tensor x_37_strides_0 = const()[name = tensor("x_37_strides_0"), val = tensor([1])]; tensor x_37_pad_0 = const()[name = tensor("x_37_pad_0"), val = tensor([0, 0])]; tensor x_37_dilations_0 = const()[name = tensor("x_37_dilations_0"), val = tensor([1])]; tensor x_37_groups_0 = const()[name = tensor("x_37_groups_0"), val = tensor(1)]; tensor layers_3_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_3_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58231680)))]; tensor x_37_cast_fp16 = conv(dilations = x_37_dilations_0, groups = x_37_groups_0, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = x_37_strides_0, weight = layers_3_ffn_conv2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("x_37_cast_fp16")]; tensor x_39_perm_0 = const()[name = tensor("x_39_perm_0"), val = tensor([0, 2, 1])]; tensor x_39_cast_fp16 = transpose(perm = x_39_perm_0, x = x_37_cast_fp16)[name = tensor("transpose_200")]; tensor input_83_cast_fp16 = add(x = input_75_cast_fp16, y = x_39_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor var_774 = const()[name = tensor("op_774"), val = tensor(-1)]; tensor x_41_axes_0 = const()[name = tensor("x_41_axes_0"), val = tensor([-1])]; tensor layers_4_norm_sa_weight_to_fp16 = const()[name = tensor("layers_4_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62950336)))]; tensor var_777_to_fp16 = const()[name = tensor("op_777_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_sa_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("x_41_cast_fp16")]; tensor layers_4_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62951936)))]; tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_qkv_proj_weight_to_fp16, x = x_41_cast_fp16)[name = tensor("linear_20_cast_fp16")]; tensor var_796 = const()[name = tensor("op_796"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_19_cast_fp16 = reshape(shape = var_796, x = linear_20_cast_fp16)[name = tensor("qkv_19_cast_fp16")]; tensor q_17_begin_0 = const()[name = tensor("q_17_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_17_end_0 = const()[name = tensor("q_17_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_17_end_mask_0 = const()[name = tensor("q_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_17_squeeze_mask_0 = const()[name = tensor("q_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("q_17_cast_fp16")]; tensor k_17_begin_0 = const()[name = tensor("k_17_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_17_end_0 = const()[name = tensor("k_17_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_17_end_mask_0 = const()[name = tensor("k_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_17_squeeze_mask_0 = const()[name = tensor("k_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_17_cast_fp16 = slice_by_index(begin = k_17_begin_0, end = k_17_end_0, end_mask = k_17_end_mask_0, squeeze_mask = k_17_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("k_17_cast_fp16")]; tensor v_17_begin_0 = const()[name = tensor("v_17_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_17_end_0 = const()[name = tensor("v_17_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_17_end_mask_0 = const()[name = tensor("v_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_17_squeeze_mask_0 = const()[name = tensor("v_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_17_cast_fp16 = slice_by_index(begin = v_17_begin_0, end = v_17_end_0, end_mask = v_17_end_mask_0, squeeze_mask = v_17_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("v_17_cast_fp16")]; tensor var_808_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position4)[name = tensor("op_808_cast_fp16")]; tensor var_810 = const()[name = tensor("op_810"), val = tensor([1, 512, 1, 1])]; tensor var_809_to_fp16_dtype_0 = const()[name = tensor("op_809_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_808_cast_fp16_to_fp16 = cast(dtype = var_809_to_fp16_dtype_0, x = var_808_cast_fp16)[name = tensor("cast_177")]; tensor mask_9_cast_fp16 = reshape(shape = var_810, x = var_808_cast_fp16_to_fp16)[name = tensor("mask_9_cast_fp16")]; tensor k_new_9_reps_0 = const()[name = tensor("k_new_9_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_9_cast_fp16 = tile(reps = k_new_9_reps_0, x = k_17_cast_fp16)[name = tensor("k_new_9_cast_fp16")]; tensor v_new_9_reps_0 = const()[name = tensor("v_new_9_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_9_cast_fp16 = tile(reps = v_new_9_reps_0, x = v_17_cast_fp16)[name = tensor("v_new_9_cast_fp16")]; tensor var_772_to_fp16 = const()[name = tensor("op_772_to_fp16"), val = tensor(0x1p+0)]; tensor var_816_cast_fp16 = sub(x = var_772_to_fp16, y = mask_9_cast_fp16)[name = tensor("op_816_cast_fp16")]; tensor var_817_cast_fp16 = mul(x = cache_k4, y = var_816_cast_fp16)[name = tensor("op_817_cast_fp16")]; tensor var_818_cast_fp16 = mul(x = k_new_9_cast_fp16, y = mask_9_cast_fp16)[name = tensor("op_818_cast_fp16")]; tensor new_k_9 = add(x = var_817_cast_fp16, y = var_818_cast_fp16)[name = tensor("new_k_9_cast_fp16")]; tensor var_821_cast_fp16 = mul(x = cache_v4, y = var_816_cast_fp16)[name = tensor("op_821_cast_fp16")]; tensor var_822_cast_fp16 = mul(x = v_new_9_cast_fp16, y = mask_9_cast_fp16)[name = tensor("op_822_cast_fp16")]; tensor new_v_9 = add(x = var_821_cast_fp16, y = var_822_cast_fp16)[name = tensor("new_v_9_cast_fp16")]; tensor var_824_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position4)[name = tensor("op_824_cast_fp16")]; tensor var_826 = const()[name = tensor("op_826"), val = tensor([1, 1, 1, 512])]; tensor var_825_to_fp16_dtype_0 = const()[name = tensor("op_825_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_824_cast_fp16_to_fp16 = cast(dtype = var_825_to_fp16_dtype_0, x = var_824_cast_fp16)[name = tensor("cast_176")]; tensor var_827_cast_fp16 = reshape(shape = var_826, x = var_824_cast_fp16_to_fp16)[name = tensor("op_827_cast_fp16")]; tensor var_831 = const()[name = tensor("op_831"), val = tensor([0, 2, 1, 3])]; tensor var_834_transpose_x_0 = const()[name = tensor("op_834_transpose_x_0"), val = tensor(false)]; tensor var_834_transpose_y_0 = const()[name = tensor("op_834_transpose_y_0"), val = tensor(false)]; tensor transpose_88_perm_0 = const()[name = tensor("transpose_88_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_89_perm_0 = const()[name = tensor("transpose_89_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_89 = transpose(perm = transpose_89_perm_0, x = new_k_9)[name = tensor("transpose_197")]; tensor transpose_88 = transpose(perm = transpose_88_perm_0, x = q_17_cast_fp16)[name = tensor("transpose_198")]; tensor var_834_cast_fp16 = matmul(transpose_x = var_834_transpose_x_0, transpose_y = var_834_transpose_y_0, x = transpose_88, y = transpose_89)[name = tensor("op_834_cast_fp16")]; tensor var_835_to_fp16 = const()[name = tensor("op_835_to_fp16"), val = tensor(0x1p-3)]; tensor attn_33_cast_fp16 = mul(x = var_834_cast_fp16, y = var_835_to_fp16)[name = tensor("attn_33_cast_fp16")]; tensor var_837_cast_fp16 = sub(x = var_772_to_fp16, y = var_827_cast_fp16)[name = tensor("op_837_cast_fp16")]; tensor var_838_to_fp16 = const()[name = tensor("op_838_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_839_cast_fp16 = mul(x = var_837_cast_fp16, y = var_838_to_fp16)[name = tensor("op_839_cast_fp16")]; tensor input_85_cast_fp16 = add(x = attn_33_cast_fp16, y = var_839_cast_fp16)[name = tensor("input_85_cast_fp16")]; tensor attn_35_cast_fp16 = softmax(axis = var_774, x = input_85_cast_fp16)[name = tensor("attn_35_cast_fp16")]; tensor out_17_transpose_x_0 = const()[name = tensor("out_17_transpose_x_0"), val = tensor(false)]; tensor out_17_transpose_y_0 = const()[name = tensor("out_17_transpose_y_0"), val = tensor(false)]; tensor v4_9_cast_fp16 = transpose(perm = var_831, x = new_v_9)[name = tensor("transpose_199")]; tensor out_17_cast_fp16 = matmul(transpose_x = out_17_transpose_x_0, transpose_y = out_17_transpose_y_0, x = attn_35_cast_fp16, y = v4_9_cast_fp16)[name = tensor("out_17_cast_fp16")]; tensor var_843_perm_0 = const()[name = tensor("op_843_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1, -1])]; tensor var_843_cast_fp16 = transpose(perm = var_843_perm_0, x = out_17_cast_fp16)[name = tensor("transpose_196")]; tensor input_87_cast_fp16 = reshape(shape = var_844, x = var_843_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66490944)))]; tensor linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("linear_21_cast_fp16")]; tensor var_848_to_fp16 = const()[name = tensor("op_848_to_fp16"), val = tensor(0x1p+0)]; tensor var_849 = add(x = position4, y = var_848_to_fp16)[name = tensor("op_849_cast_fp16")]; tensor input_89_cast_fp16 = add(x = input_83_cast_fp16, y = linear_21_cast_fp16)[name = tensor("input_89_cast_fp16")]; tensor x_43_axes_0 = const()[name = tensor("x_43_axes_0"), val = tensor([-1])]; tensor layers_4_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_4_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67670656)))]; tensor x_43_cast_fp16 = layer_norm(axes = x_43_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_xa_query_weight_to_fp16, x = input_89_cast_fp16)[name = tensor("x_43_cast_fp16")]; tensor memory_9_axes_0 = const()[name = tensor("memory_9_axes_0"), val = tensor([-1])]; tensor layers_4_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_4_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67672256)))]; tensor memory_9_cast_fp16 = layer_norm(axes = memory_9_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_9_cast_fp16")]; tensor layers_4_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_4_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67673856)))]; tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_4_cross_attn_q_proj_weight_to_fp16, x = x_43_cast_fp16)[name = tensor("linear_22_cast_fp16")]; tensor var_870 = const()[name = tensor("op_870"), val = tensor([1, 1, 1, 128])]; tensor var_871_cast_fp16 = reshape(shape = var_870, x = linear_22_cast_fp16)[name = tensor("op_871_cast_fp16")]; tensor layers_4_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_4_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67870528)))]; tensor linear_23_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_4_cross_attn_kv_proj_weight_to_fp16, x = memory_9_cast_fp16)[name = tensor("linear_23_cast_fp16")]; tensor var_875 = const()[name = tensor("op_875"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_9_cast_fp16 = reshape(shape = var_875, x = linear_23_cast_fp16)[name = tensor("kv_9_cast_fp16")]; tensor var_879_begin_0 = const()[name = tensor("op_879_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_879_end_0 = const()[name = tensor("op_879_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_879_end_mask_0 = const()[name = tensor("op_879_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_879_squeeze_mask_0 = const()[name = tensor("op_879_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_879_cast_fp16 = slice_by_index(begin = var_879_begin_0, end = var_879_end_0, end_mask = var_879_end_mask_0, squeeze_mask = var_879_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor("op_879_cast_fp16")]; tensor var_883_begin_0 = const()[name = tensor("op_883_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_883_end_0 = const()[name = tensor("op_883_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_883_end_mask_0 = const()[name = tensor("op_883_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_883_squeeze_mask_0 = const()[name = tensor("op_883_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_883_cast_fp16 = slice_by_index(begin = var_883_begin_0, end = var_883_end_0, end_mask = var_883_end_mask_0, squeeze_mask = var_883_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor("op_883_cast_fp16")]; tensor v_19_perm_0 = const()[name = tensor("v_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_886_transpose_x_0 = const()[name = tensor("op_886_transpose_x_0"), val = tensor(false)]; tensor var_886_transpose_y_0 = const()[name = tensor("op_886_transpose_y_0"), val = tensor(false)]; tensor transpose_90_perm_0 = const()[name = tensor("transpose_90_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_91_perm_0 = const()[name = tensor("transpose_91_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_91 = transpose(perm = transpose_91_perm_0, x = var_879_cast_fp16)[name = tensor("transpose_193")]; tensor transpose_90 = transpose(perm = transpose_90_perm_0, x = var_871_cast_fp16)[name = tensor("transpose_194")]; tensor var_886_cast_fp16 = matmul(transpose_x = var_886_transpose_x_0, transpose_y = var_886_transpose_y_0, x = transpose_90, y = transpose_91)[name = tensor("op_886_cast_fp16")]; tensor var_887_to_fp16 = const()[name = tensor("op_887_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_37_cast_fp16 = mul(x = var_886_cast_fp16, y = var_887_to_fp16)[name = tensor("attn_37_cast_fp16")]; tensor input_91_cast_fp16 = add(x = attn_37_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_91_cast_fp16")]; tensor attn_39_cast_fp16 = softmax(axis = var_774, x = input_91_cast_fp16)[name = tensor("attn_39_cast_fp16")]; tensor out_19_transpose_x_0 = const()[name = tensor("out_19_transpose_x_0"), val = tensor(false)]; tensor out_19_transpose_y_0 = const()[name = tensor("out_19_transpose_y_0"), val = tensor(false)]; tensor v_19_cast_fp16 = transpose(perm = v_19_perm_0, x = var_883_cast_fp16)[name = tensor("transpose_195")]; tensor out_19_cast_fp16 = matmul(transpose_x = out_19_transpose_x_0, transpose_y = out_19_transpose_y_0, x = attn_39_cast_fp16, y = v_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; tensor var_898_perm_0 = const()[name = tensor("op_898_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_899 = const()[name = tensor("op_899"), val = tensor([1, 1, -1])]; tensor var_898_cast_fp16 = transpose(perm = var_898_perm_0, x = out_19_cast_fp16)[name = tensor("transpose_192")]; tensor input_93_cast_fp16 = reshape(shape = var_899, x = var_898_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor layers_4_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_4_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68263808)))]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_cross_attn_o_proj_weight_to_fp16, x = input_93_cast_fp16)[name = tensor("linear_24_cast_fp16")]; tensor input_95_cast_fp16 = add(x = input_89_cast_fp16, y = linear_24_cast_fp16)[name = tensor("input_95_cast_fp16")]; tensor x_45_axes_0 = const()[name = tensor("x_45_axes_0"), val = tensor([-1])]; tensor layers_4_norm_ff_weight_to_fp16 = const()[name = tensor("layers_4_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68460480)))]; tensor x_45_cast_fp16 = layer_norm(axes = x_45_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_ff_weight_to_fp16, x = input_95_cast_fp16)[name = tensor("x_45_cast_fp16")]; tensor input_97_perm_0 = const()[name = tensor("input_97_perm_0"), val = tensor([0, 2, 1])]; tensor input_99_pad_type_0 = const()[name = tensor("input_99_pad_type_0"), val = tensor("valid")]; tensor input_99_strides_0 = const()[name = tensor("input_99_strides_0"), val = tensor([1])]; tensor input_99_pad_0 = const()[name = tensor("input_99_pad_0"), val = tensor([0, 0])]; tensor input_99_dilations_0 = const()[name = tensor("input_99_dilations_0"), val = tensor([1])]; tensor input_99_groups_0 = const()[name = tensor("input_99_groups_0"), val = tensor(1)]; tensor layers_4_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_4_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68462080)))]; tensor input_97_cast_fp16 = transpose(perm = input_97_perm_0, x = x_45_cast_fp16)[name = tensor("transpose_191")]; tensor input_99_cast_fp16 = conv(dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = layers_4_ffn_conv1_weight_to_fp16, x = input_97_cast_fp16)[name = tensor("input_99_cast_fp16")]; tensor input_101_mode_0 = const()[name = tensor("input_101_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor("input_101_cast_fp16")]; tensor x_47_pad_type_0 = const()[name = tensor("x_47_pad_type_0"), val = tensor("valid")]; tensor x_47_strides_0 = const()[name = tensor("x_47_strides_0"), val = tensor([1])]; tensor x_47_pad_0 = const()[name = tensor("x_47_pad_0"), val = tensor([0, 0])]; tensor x_47_dilations_0 = const()[name = tensor("x_47_dilations_0"), val = tensor([1])]; tensor x_47_groups_0 = const()[name = tensor("x_47_groups_0"), val = tensor(1)]; tensor layers_4_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_4_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73180736)))]; tensor x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = layers_4_ffn_conv2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor("x_47_cast_fp16")]; tensor x_49_perm_0 = const()[name = tensor("x_49_perm_0"), val = tensor([0, 2, 1])]; tensor x_49_cast_fp16 = transpose(perm = x_49_perm_0, x = x_47_cast_fp16)[name = tensor("transpose_190")]; tensor input_103_cast_fp16 = add(x = input_95_cast_fp16, y = x_49_cast_fp16)[name = tensor("input_103_cast_fp16")]; tensor var_944 = const()[name = tensor("op_944"), val = tensor(-1)]; tensor x_51_axes_0 = const()[name = tensor("x_51_axes_0"), val = tensor([-1])]; tensor layers_5_norm_sa_weight_to_fp16 = const()[name = tensor("layers_5_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77899392)))]; tensor var_947_to_fp16 = const()[name = tensor("op_947_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_51_cast_fp16 = layer_norm(axes = x_51_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_sa_weight_to_fp16, x = input_103_cast_fp16)[name = tensor("x_51_cast_fp16")]; tensor layers_5_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77900992)))]; tensor linear_25_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_qkv_proj_weight_to_fp16, x = x_51_cast_fp16)[name = tensor("linear_25_cast_fp16")]; tensor var_966 = const()[name = tensor("op_966"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_23_cast_fp16 = reshape(shape = var_966, x = linear_25_cast_fp16)[name = tensor("qkv_23_cast_fp16")]; tensor q_21_begin_0 = const()[name = tensor("q_21_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_21_end_0 = const()[name = tensor("q_21_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_21_end_mask_0 = const()[name = tensor("q_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_21_squeeze_mask_0 = const()[name = tensor("q_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("q_21_cast_fp16")]; tensor k_21_begin_0 = const()[name = tensor("k_21_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_21_end_0 = const()[name = tensor("k_21_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_21_end_mask_0 = const()[name = tensor("k_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_21_squeeze_mask_0 = const()[name = tensor("k_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_21_cast_fp16 = slice_by_index(begin = k_21_begin_0, end = k_21_end_0, end_mask = k_21_end_mask_0, squeeze_mask = k_21_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("k_21_cast_fp16")]; tensor v_21_begin_0 = const()[name = tensor("v_21_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_21_end_0 = const()[name = tensor("v_21_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_21_end_mask_0 = const()[name = tensor("v_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_21_squeeze_mask_0 = const()[name = tensor("v_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_21_cast_fp16 = slice_by_index(begin = v_21_begin_0, end = v_21_end_0, end_mask = v_21_end_mask_0, squeeze_mask = v_21_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("v_21_cast_fp16")]; tensor var_978_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position5)[name = tensor("op_978_cast_fp16")]; tensor var_980 = const()[name = tensor("op_980"), val = tensor([1, 512, 1, 1])]; tensor var_979_to_fp16_dtype_0 = const()[name = tensor("op_979_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_978_cast_fp16_to_fp16 = cast(dtype = var_979_to_fp16_dtype_0, x = var_978_cast_fp16)[name = tensor("cast_175")]; tensor mask_11_cast_fp16 = reshape(shape = var_980, x = var_978_cast_fp16_to_fp16)[name = tensor("mask_11_cast_fp16")]; tensor k_new_11_reps_0 = const()[name = tensor("k_new_11_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_11_cast_fp16 = tile(reps = k_new_11_reps_0, x = k_21_cast_fp16)[name = tensor("k_new_11_cast_fp16")]; tensor v_new_11_reps_0 = const()[name = tensor("v_new_11_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_11_cast_fp16 = tile(reps = v_new_11_reps_0, x = v_21_cast_fp16)[name = tensor("v_new_11_cast_fp16")]; tensor var_942_to_fp16 = const()[name = tensor("op_942_to_fp16"), val = tensor(0x1p+0)]; tensor var_986_cast_fp16 = sub(x = var_942_to_fp16, y = mask_11_cast_fp16)[name = tensor("op_986_cast_fp16")]; tensor var_987_cast_fp16 = mul(x = cache_k5, y = var_986_cast_fp16)[name = tensor("op_987_cast_fp16")]; tensor var_988_cast_fp16 = mul(x = k_new_11_cast_fp16, y = mask_11_cast_fp16)[name = tensor("op_988_cast_fp16")]; tensor new_k_11 = add(x = var_987_cast_fp16, y = var_988_cast_fp16)[name = tensor("new_k_11_cast_fp16")]; tensor var_991_cast_fp16 = mul(x = cache_v5, y = var_986_cast_fp16)[name = tensor("op_991_cast_fp16")]; tensor var_992_cast_fp16 = mul(x = v_new_11_cast_fp16, y = mask_11_cast_fp16)[name = tensor("op_992_cast_fp16")]; tensor new_v_11 = add(x = var_991_cast_fp16, y = var_992_cast_fp16)[name = tensor("new_v_11_cast_fp16")]; tensor var_994_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position5)[name = tensor("op_994_cast_fp16")]; tensor var_996 = const()[name = tensor("op_996"), val = tensor([1, 1, 1, 512])]; tensor var_995_to_fp16_dtype_0 = const()[name = tensor("op_995_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_994_cast_fp16_to_fp16 = cast(dtype = var_995_to_fp16_dtype_0, x = var_994_cast_fp16)[name = tensor("cast_174")]; tensor var_997_cast_fp16 = reshape(shape = var_996, x = var_994_cast_fp16_to_fp16)[name = tensor("op_997_cast_fp16")]; tensor var_1001 = const()[name = tensor("op_1001"), val = tensor([0, 2, 1, 3])]; tensor var_1004_transpose_x_0 = const()[name = tensor("op_1004_transpose_x_0"), val = tensor(false)]; tensor var_1004_transpose_y_0 = const()[name = tensor("op_1004_transpose_y_0"), val = tensor(false)]; tensor transpose_92_perm_0 = const()[name = tensor("transpose_92_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_93_perm_0 = const()[name = tensor("transpose_93_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_93 = transpose(perm = transpose_93_perm_0, x = new_k_11)[name = tensor("transpose_187")]; tensor transpose_92 = transpose(perm = transpose_92_perm_0, x = q_21_cast_fp16)[name = tensor("transpose_188")]; tensor var_1004_cast_fp16 = matmul(transpose_x = var_1004_transpose_x_0, transpose_y = var_1004_transpose_y_0, x = transpose_92, y = transpose_93)[name = tensor("op_1004_cast_fp16")]; tensor var_1005_to_fp16 = const()[name = tensor("op_1005_to_fp16"), val = tensor(0x1p-3)]; tensor attn_41_cast_fp16 = mul(x = var_1004_cast_fp16, y = var_1005_to_fp16)[name = tensor("attn_41_cast_fp16")]; tensor var_1007_cast_fp16 = sub(x = var_942_to_fp16, y = var_997_cast_fp16)[name = tensor("op_1007_cast_fp16")]; tensor var_1008_to_fp16 = const()[name = tensor("op_1008_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_1009_cast_fp16 = mul(x = var_1007_cast_fp16, y = var_1008_to_fp16)[name = tensor("op_1009_cast_fp16")]; tensor input_105_cast_fp16 = add(x = attn_41_cast_fp16, y = var_1009_cast_fp16)[name = tensor("input_105_cast_fp16")]; tensor attn_43_cast_fp16 = softmax(axis = var_944, x = input_105_cast_fp16)[name = tensor("attn_43_cast_fp16")]; tensor out_21_transpose_x_0 = const()[name = tensor("out_21_transpose_x_0"), val = tensor(false)]; tensor out_21_transpose_y_0 = const()[name = tensor("out_21_transpose_y_0"), val = tensor(false)]; tensor v4_11_cast_fp16 = transpose(perm = var_1001, x = new_v_11)[name = tensor("transpose_189")]; tensor out_21_cast_fp16 = matmul(transpose_x = out_21_transpose_x_0, transpose_y = out_21_transpose_y_0, x = attn_43_cast_fp16, y = v4_11_cast_fp16)[name = tensor("out_21_cast_fp16")]; tensor var_1013_perm_0 = const()[name = tensor("op_1013_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1014 = const()[name = tensor("op_1014"), val = tensor([1, 1, -1])]; tensor var_1013_cast_fp16 = transpose(perm = var_1013_perm_0, x = out_21_cast_fp16)[name = tensor("transpose_186")]; tensor input_107_cast_fp16 = reshape(shape = var_1014, x = var_1013_cast_fp16)[name = tensor("input_107_cast_fp16")]; tensor layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81440000)))]; tensor linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_107_cast_fp16)[name = tensor("linear_26_cast_fp16")]; tensor var_1018_to_fp16 = const()[name = tensor("op_1018_to_fp16"), val = tensor(0x1p+0)]; tensor var_1019 = add(x = position5, y = var_1018_to_fp16)[name = tensor("op_1019_cast_fp16")]; tensor input_109_cast_fp16 = add(x = input_103_cast_fp16, y = linear_26_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor x_53_axes_0 = const()[name = tensor("x_53_axes_0"), val = tensor([-1])]; tensor layers_5_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_5_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82619712)))]; tensor x_53_cast_fp16 = layer_norm(axes = x_53_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_xa_query_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("x_53_cast_fp16")]; tensor memory_11_axes_0 = const()[name = tensor("memory_11_axes_0"), val = tensor([-1])]; tensor layers_5_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_5_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82621312)))]; tensor memory_11_cast_fp16 = layer_norm(axes = memory_11_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_11_cast_fp16")]; tensor layers_5_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_5_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82622912)))]; tensor linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_5_cross_attn_q_proj_weight_to_fp16, x = x_53_cast_fp16)[name = tensor("linear_27_cast_fp16")]; tensor var_1040 = const()[name = tensor("op_1040"), val = tensor([1, 1, 1, 128])]; tensor var_1041_cast_fp16 = reshape(shape = var_1040, x = linear_27_cast_fp16)[name = tensor("op_1041_cast_fp16")]; tensor layers_5_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_5_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82819584)))]; tensor linear_28_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_5_cross_attn_kv_proj_weight_to_fp16, x = memory_11_cast_fp16)[name = tensor("linear_28_cast_fp16")]; tensor var_1045 = const()[name = tensor("op_1045"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_11_cast_fp16 = reshape(shape = var_1045, x = linear_28_cast_fp16)[name = tensor("kv_11_cast_fp16")]; tensor var_1049_begin_0 = const()[name = tensor("op_1049_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_1049_end_0 = const()[name = tensor("op_1049_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_1049_end_mask_0 = const()[name = tensor("op_1049_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1049_squeeze_mask_0 = const()[name = tensor("op_1049_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1049_cast_fp16 = slice_by_index(begin = var_1049_begin_0, end = var_1049_end_0, end_mask = var_1049_end_mask_0, squeeze_mask = var_1049_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor("op_1049_cast_fp16")]; tensor var_1053_begin_0 = const()[name = tensor("op_1053_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_1053_end_0 = const()[name = tensor("op_1053_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_1053_end_mask_0 = const()[name = tensor("op_1053_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1053_squeeze_mask_0 = const()[name = tensor("op_1053_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1053_cast_fp16 = slice_by_index(begin = var_1053_begin_0, end = var_1053_end_0, end_mask = var_1053_end_mask_0, squeeze_mask = var_1053_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor("op_1053_cast_fp16")]; tensor v_23_perm_0 = const()[name = tensor("v_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1056_transpose_x_0 = const()[name = tensor("op_1056_transpose_x_0"), val = tensor(false)]; tensor var_1056_transpose_y_0 = const()[name = tensor("op_1056_transpose_y_0"), val = tensor(false)]; tensor transpose_94_perm_0 = const()[name = tensor("transpose_94_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_95_perm_0 = const()[name = tensor("transpose_95_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_95 = transpose(perm = transpose_95_perm_0, x = var_1049_cast_fp16)[name = tensor("transpose_183")]; tensor transpose_94 = transpose(perm = transpose_94_perm_0, x = var_1041_cast_fp16)[name = tensor("transpose_184")]; tensor var_1056_cast_fp16 = matmul(transpose_x = var_1056_transpose_x_0, transpose_y = var_1056_transpose_y_0, x = transpose_94, y = transpose_95)[name = tensor("op_1056_cast_fp16")]; tensor var_1057_to_fp16 = const()[name = tensor("op_1057_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_45_cast_fp16 = mul(x = var_1056_cast_fp16, y = var_1057_to_fp16)[name = tensor("attn_45_cast_fp16")]; tensor input_111_cast_fp16 = add(x = attn_45_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor attn_47_cast_fp16 = softmax(axis = var_944, x = input_111_cast_fp16)[name = tensor("attn_47_cast_fp16")]; tensor out_23_transpose_x_0 = const()[name = tensor("out_23_transpose_x_0"), val = tensor(false)]; tensor out_23_transpose_y_0 = const()[name = tensor("out_23_transpose_y_0"), val = tensor(false)]; tensor v_23_cast_fp16 = transpose(perm = v_23_perm_0, x = var_1053_cast_fp16)[name = tensor("transpose_185")]; tensor out_23_cast_fp16 = matmul(transpose_x = out_23_transpose_x_0, transpose_y = out_23_transpose_y_0, x = attn_47_cast_fp16, y = v_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; tensor var_1068_perm_0 = const()[name = tensor("op_1068_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1069 = const()[name = tensor("op_1069"), val = tensor([1, 1, -1])]; tensor var_1068_cast_fp16 = transpose(perm = var_1068_perm_0, x = out_23_cast_fp16)[name = tensor("transpose_182")]; tensor input_113_cast_fp16 = reshape(shape = var_1069, x = var_1068_cast_fp16)[name = tensor("input_113_cast_fp16")]; tensor layers_5_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_5_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83212864)))]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_cross_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor("linear_29_cast_fp16")]; tensor input_115_cast_fp16 = add(x = input_109_cast_fp16, y = linear_29_cast_fp16)[name = tensor("input_115_cast_fp16")]; tensor x_55_axes_0 = const()[name = tensor("x_55_axes_0"), val = tensor([-1])]; tensor layers_5_norm_ff_weight_to_fp16 = const()[name = tensor("layers_5_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83409536)))]; tensor x_55_cast_fp16 = layer_norm(axes = x_55_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_ff_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("x_55_cast_fp16")]; tensor input_117_perm_0 = const()[name = tensor("input_117_perm_0"), val = tensor([0, 2, 1])]; tensor input_119_pad_type_0 = const()[name = tensor("input_119_pad_type_0"), val = tensor("valid")]; tensor input_119_strides_0 = const()[name = tensor("input_119_strides_0"), val = tensor([1])]; tensor input_119_pad_0 = const()[name = tensor("input_119_pad_0"), val = tensor([0, 0])]; tensor input_119_dilations_0 = const()[name = tensor("input_119_dilations_0"), val = tensor([1])]; tensor input_119_groups_0 = const()[name = tensor("input_119_groups_0"), val = tensor(1)]; tensor layers_5_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_5_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83411136)))]; tensor input_117_cast_fp16 = transpose(perm = input_117_perm_0, x = x_55_cast_fp16)[name = tensor("transpose_181")]; tensor input_119_cast_fp16 = conv(dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = layers_5_ffn_conv1_weight_to_fp16, x = input_117_cast_fp16)[name = tensor("input_119_cast_fp16")]; tensor input_121_mode_0 = const()[name = tensor("input_121_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor("input_121_cast_fp16")]; tensor x_57_pad_type_0 = const()[name = tensor("x_57_pad_type_0"), val = tensor("valid")]; tensor x_57_strides_0 = const()[name = tensor("x_57_strides_0"), val = tensor([1])]; tensor x_57_pad_0 = const()[name = tensor("x_57_pad_0"), val = tensor([0, 0])]; tensor x_57_dilations_0 = const()[name = tensor("x_57_dilations_0"), val = tensor([1])]; tensor x_57_groups_0 = const()[name = tensor("x_57_groups_0"), val = tensor(1)]; tensor layers_5_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_5_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88129792)))]; tensor x_57_cast_fp16 = conv(dilations = x_57_dilations_0, groups = x_57_groups_0, pad = x_57_pad_0, pad_type = x_57_pad_type_0, strides = x_57_strides_0, weight = layers_5_ffn_conv2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("x_57_cast_fp16")]; tensor x_59_perm_0 = const()[name = tensor("x_59_perm_0"), val = tensor([0, 2, 1])]; tensor x_59_cast_fp16 = transpose(perm = x_59_perm_0, x = x_57_cast_fp16)[name = tensor("transpose_180")]; tensor input_123_cast_fp16 = add(x = input_115_cast_fp16, y = x_59_cast_fp16)[name = tensor("input_123_cast_fp16")]; tensor var_1114 = const()[name = tensor("op_1114"), val = tensor(-1)]; tensor x_61_axes_0 = const()[name = tensor("x_61_axes_0"), val = tensor([-1])]; tensor layers_6_norm_sa_weight_to_fp16 = const()[name = tensor("layers_6_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92848448)))]; tensor var_1117_to_fp16 = const()[name = tensor("op_1117_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_61_cast_fp16 = layer_norm(axes = x_61_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_sa_weight_to_fp16, x = input_123_cast_fp16)[name = tensor("x_61_cast_fp16")]; tensor layers_6_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92850048)))]; tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_qkv_proj_weight_to_fp16, x = x_61_cast_fp16)[name = tensor("linear_30_cast_fp16")]; tensor var_1136 = const()[name = tensor("op_1136"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_27_cast_fp16 = reshape(shape = var_1136, x = linear_30_cast_fp16)[name = tensor("qkv_27_cast_fp16")]; tensor q_25_begin_0 = const()[name = tensor("q_25_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_25_end_0 = const()[name = tensor("q_25_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_25_end_mask_0 = const()[name = tensor("q_25_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_25_squeeze_mask_0 = const()[name = tensor("q_25_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_25_cast_fp16 = slice_by_index(begin = q_25_begin_0, end = q_25_end_0, end_mask = q_25_end_mask_0, squeeze_mask = q_25_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("q_25_cast_fp16")]; tensor k_25_begin_0 = const()[name = tensor("k_25_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_25_end_0 = const()[name = tensor("k_25_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_25_end_mask_0 = const()[name = tensor("k_25_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_25_squeeze_mask_0 = const()[name = tensor("k_25_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_25_cast_fp16 = slice_by_index(begin = k_25_begin_0, end = k_25_end_0, end_mask = k_25_end_mask_0, squeeze_mask = k_25_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("k_25_cast_fp16")]; tensor v_25_begin_0 = const()[name = tensor("v_25_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_25_end_0 = const()[name = tensor("v_25_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_25_end_mask_0 = const()[name = tensor("v_25_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_25_squeeze_mask_0 = const()[name = tensor("v_25_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_25_cast_fp16 = slice_by_index(begin = v_25_begin_0, end = v_25_end_0, end_mask = v_25_end_mask_0, squeeze_mask = v_25_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("v_25_cast_fp16")]; tensor var_1148_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position6)[name = tensor("op_1148_cast_fp16")]; tensor var_1150 = const()[name = tensor("op_1150"), val = tensor([1, 512, 1, 1])]; tensor var_1149_to_fp16_dtype_0 = const()[name = tensor("op_1149_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1148_cast_fp16_to_fp16 = cast(dtype = var_1149_to_fp16_dtype_0, x = var_1148_cast_fp16)[name = tensor("cast_173")]; tensor mask_13_cast_fp16 = reshape(shape = var_1150, x = var_1148_cast_fp16_to_fp16)[name = tensor("mask_13_cast_fp16")]; tensor k_new_13_reps_0 = const()[name = tensor("k_new_13_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_13_cast_fp16 = tile(reps = k_new_13_reps_0, x = k_25_cast_fp16)[name = tensor("k_new_13_cast_fp16")]; tensor v_new_13_reps_0 = const()[name = tensor("v_new_13_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_13_cast_fp16 = tile(reps = v_new_13_reps_0, x = v_25_cast_fp16)[name = tensor("v_new_13_cast_fp16")]; tensor var_1112_to_fp16 = const()[name = tensor("op_1112_to_fp16"), val = tensor(0x1p+0)]; tensor var_1156_cast_fp16 = sub(x = var_1112_to_fp16, y = mask_13_cast_fp16)[name = tensor("op_1156_cast_fp16")]; tensor var_1157_cast_fp16 = mul(x = cache_k6, y = var_1156_cast_fp16)[name = tensor("op_1157_cast_fp16")]; tensor var_1158_cast_fp16 = mul(x = k_new_13_cast_fp16, y = mask_13_cast_fp16)[name = tensor("op_1158_cast_fp16")]; tensor new_k_13 = add(x = var_1157_cast_fp16, y = var_1158_cast_fp16)[name = tensor("new_k_13_cast_fp16")]; tensor var_1161_cast_fp16 = mul(x = cache_v6, y = var_1156_cast_fp16)[name = tensor("op_1161_cast_fp16")]; tensor var_1162_cast_fp16 = mul(x = v_new_13_cast_fp16, y = mask_13_cast_fp16)[name = tensor("op_1162_cast_fp16")]; tensor new_v_13 = add(x = var_1161_cast_fp16, y = var_1162_cast_fp16)[name = tensor("new_v_13_cast_fp16")]; tensor var_1164_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position6)[name = tensor("op_1164_cast_fp16")]; tensor var_1166 = const()[name = tensor("op_1166"), val = tensor([1, 1, 1, 512])]; tensor var_1165_to_fp16_dtype_0 = const()[name = tensor("op_1165_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1164_cast_fp16_to_fp16 = cast(dtype = var_1165_to_fp16_dtype_0, x = var_1164_cast_fp16)[name = tensor("cast_172")]; tensor var_1167_cast_fp16 = reshape(shape = var_1166, x = var_1164_cast_fp16_to_fp16)[name = tensor("op_1167_cast_fp16")]; tensor var_1171 = const()[name = tensor("op_1171"), val = tensor([0, 2, 1, 3])]; tensor var_1174_transpose_x_0 = const()[name = tensor("op_1174_transpose_x_0"), val = tensor(false)]; tensor var_1174_transpose_y_0 = const()[name = tensor("op_1174_transpose_y_0"), val = tensor(false)]; tensor transpose_96_perm_0 = const()[name = tensor("transpose_96_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_97_perm_0 = const()[name = tensor("transpose_97_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = new_k_13)[name = tensor("transpose_177")]; tensor transpose_96 = transpose(perm = transpose_96_perm_0, x = q_25_cast_fp16)[name = tensor("transpose_178")]; tensor var_1174_cast_fp16 = matmul(transpose_x = var_1174_transpose_x_0, transpose_y = var_1174_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor("op_1174_cast_fp16")]; tensor var_1175_to_fp16 = const()[name = tensor("op_1175_to_fp16"), val = tensor(0x1p-3)]; tensor attn_49_cast_fp16 = mul(x = var_1174_cast_fp16, y = var_1175_to_fp16)[name = tensor("attn_49_cast_fp16")]; tensor var_1177_cast_fp16 = sub(x = var_1112_to_fp16, y = var_1167_cast_fp16)[name = tensor("op_1177_cast_fp16")]; tensor var_1178_to_fp16 = const()[name = tensor("op_1178_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_1179_cast_fp16 = mul(x = var_1177_cast_fp16, y = var_1178_to_fp16)[name = tensor("op_1179_cast_fp16")]; tensor input_125_cast_fp16 = add(x = attn_49_cast_fp16, y = var_1179_cast_fp16)[name = tensor("input_125_cast_fp16")]; tensor attn_51_cast_fp16 = softmax(axis = var_1114, x = input_125_cast_fp16)[name = tensor("attn_51_cast_fp16")]; tensor out_25_transpose_x_0 = const()[name = tensor("out_25_transpose_x_0"), val = tensor(false)]; tensor out_25_transpose_y_0 = const()[name = tensor("out_25_transpose_y_0"), val = tensor(false)]; tensor v4_13_cast_fp16 = transpose(perm = var_1171, x = new_v_13)[name = tensor("transpose_179")]; tensor out_25_cast_fp16 = matmul(transpose_x = out_25_transpose_x_0, transpose_y = out_25_transpose_y_0, x = attn_51_cast_fp16, y = v4_13_cast_fp16)[name = tensor("out_25_cast_fp16")]; tensor var_1183_perm_0 = const()[name = tensor("op_1183_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1184 = const()[name = tensor("op_1184"), val = tensor([1, 1, -1])]; tensor var_1183_cast_fp16 = transpose(perm = var_1183_perm_0, x = out_25_cast_fp16)[name = tensor("transpose_176")]; tensor input_127_cast_fp16 = reshape(shape = var_1184, x = var_1183_cast_fp16)[name = tensor("input_127_cast_fp16")]; tensor layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96389056)))]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_127_cast_fp16)[name = tensor("linear_31_cast_fp16")]; tensor var_1188_to_fp16 = const()[name = tensor("op_1188_to_fp16"), val = tensor(0x1p+0)]; tensor var_1189 = add(x = position6, y = var_1188_to_fp16)[name = tensor("op_1189_cast_fp16")]; tensor input_129_cast_fp16 = add(x = input_123_cast_fp16, y = linear_31_cast_fp16)[name = tensor("input_129_cast_fp16")]; tensor x_63_axes_0 = const()[name = tensor("x_63_axes_0"), val = tensor([-1])]; tensor layers_6_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_6_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97568768)))]; tensor x_63_cast_fp16 = layer_norm(axes = x_63_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_xa_query_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("x_63_cast_fp16")]; tensor memory_13_axes_0 = const()[name = tensor("memory_13_axes_0"), val = tensor([-1])]; tensor layers_6_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_6_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97570368)))]; tensor memory_13_cast_fp16 = layer_norm(axes = memory_13_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_13_cast_fp16")]; tensor layers_6_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_6_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97571968)))]; tensor linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_6_cross_attn_q_proj_weight_to_fp16, x = x_63_cast_fp16)[name = tensor("linear_32_cast_fp16")]; tensor var_1210 = const()[name = tensor("op_1210"), val = tensor([1, 1, 1, 128])]; tensor var_1211_cast_fp16 = reshape(shape = var_1210, x = linear_32_cast_fp16)[name = tensor("op_1211_cast_fp16")]; tensor layers_6_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_6_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97768640)))]; tensor linear_33_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_6_cross_attn_kv_proj_weight_to_fp16, x = memory_13_cast_fp16)[name = tensor("linear_33_cast_fp16")]; tensor var_1215 = const()[name = tensor("op_1215"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_13_cast_fp16 = reshape(shape = var_1215, x = linear_33_cast_fp16)[name = tensor("kv_13_cast_fp16")]; tensor var_1219_begin_0 = const()[name = tensor("op_1219_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_1219_end_0 = const()[name = tensor("op_1219_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_1219_end_mask_0 = const()[name = tensor("op_1219_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1219_squeeze_mask_0 = const()[name = tensor("op_1219_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1219_cast_fp16 = slice_by_index(begin = var_1219_begin_0, end = var_1219_end_0, end_mask = var_1219_end_mask_0, squeeze_mask = var_1219_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor("op_1219_cast_fp16")]; tensor var_1223_begin_0 = const()[name = tensor("op_1223_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_1223_end_0 = const()[name = tensor("op_1223_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_1223_end_mask_0 = const()[name = tensor("op_1223_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1223_squeeze_mask_0 = const()[name = tensor("op_1223_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1223_cast_fp16 = slice_by_index(begin = var_1223_begin_0, end = var_1223_end_0, end_mask = var_1223_end_mask_0, squeeze_mask = var_1223_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor("op_1223_cast_fp16")]; tensor v_27_perm_0 = const()[name = tensor("v_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1226_transpose_x_0 = const()[name = tensor("op_1226_transpose_x_0"), val = tensor(false)]; tensor var_1226_transpose_y_0 = const()[name = tensor("op_1226_transpose_y_0"), val = tensor(false)]; tensor transpose_98_perm_0 = const()[name = tensor("transpose_98_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_99_perm_0 = const()[name = tensor("transpose_99_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = var_1219_cast_fp16)[name = tensor("transpose_173")]; tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = var_1211_cast_fp16)[name = tensor("transpose_174")]; tensor var_1226_cast_fp16 = matmul(transpose_x = var_1226_transpose_x_0, transpose_y = var_1226_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor("op_1226_cast_fp16")]; tensor var_1227_to_fp16 = const()[name = tensor("op_1227_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_53_cast_fp16 = mul(x = var_1226_cast_fp16, y = var_1227_to_fp16)[name = tensor("attn_53_cast_fp16")]; tensor input_131_cast_fp16 = add(x = attn_53_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_131_cast_fp16")]; tensor attn_55_cast_fp16 = softmax(axis = var_1114, x = input_131_cast_fp16)[name = tensor("attn_55_cast_fp16")]; tensor out_27_transpose_x_0 = const()[name = tensor("out_27_transpose_x_0"), val = tensor(false)]; tensor out_27_transpose_y_0 = const()[name = tensor("out_27_transpose_y_0"), val = tensor(false)]; tensor v_27_cast_fp16 = transpose(perm = v_27_perm_0, x = var_1223_cast_fp16)[name = tensor("transpose_175")]; tensor out_27_cast_fp16 = matmul(transpose_x = out_27_transpose_x_0, transpose_y = out_27_transpose_y_0, x = attn_55_cast_fp16, y = v_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; tensor var_1238_perm_0 = const()[name = tensor("op_1238_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1239 = const()[name = tensor("op_1239"), val = tensor([1, 1, -1])]; tensor var_1238_cast_fp16 = transpose(perm = var_1238_perm_0, x = out_27_cast_fp16)[name = tensor("transpose_172")]; tensor input_133_cast_fp16 = reshape(shape = var_1239, x = var_1238_cast_fp16)[name = tensor("input_133_cast_fp16")]; tensor layers_6_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_6_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98161920)))]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_cross_attn_o_proj_weight_to_fp16, x = input_133_cast_fp16)[name = tensor("linear_34_cast_fp16")]; tensor input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_34_cast_fp16)[name = tensor("input_135_cast_fp16")]; tensor x_65_axes_0 = const()[name = tensor("x_65_axes_0"), val = tensor([-1])]; tensor layers_6_norm_ff_weight_to_fp16 = const()[name = tensor("layers_6_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98358592)))]; tensor x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_ff_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("x_65_cast_fp16")]; tensor input_137_perm_0 = const()[name = tensor("input_137_perm_0"), val = tensor([0, 2, 1])]; tensor input_139_pad_type_0 = const()[name = tensor("input_139_pad_type_0"), val = tensor("valid")]; tensor input_139_strides_0 = const()[name = tensor("input_139_strides_0"), val = tensor([1])]; tensor input_139_pad_0 = const()[name = tensor("input_139_pad_0"), val = tensor([0, 0])]; tensor input_139_dilations_0 = const()[name = tensor("input_139_dilations_0"), val = tensor([1])]; tensor input_139_groups_0 = const()[name = tensor("input_139_groups_0"), val = tensor(1)]; tensor layers_6_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_6_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98360192)))]; tensor input_137_cast_fp16 = transpose(perm = input_137_perm_0, x = x_65_cast_fp16)[name = tensor("transpose_171")]; tensor input_139_cast_fp16 = conv(dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = layers_6_ffn_conv1_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("input_139_cast_fp16")]; tensor input_141_mode_0 = const()[name = tensor("input_141_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor("input_141_cast_fp16")]; tensor x_67_pad_type_0 = const()[name = tensor("x_67_pad_type_0"), val = tensor("valid")]; tensor x_67_strides_0 = const()[name = tensor("x_67_strides_0"), val = tensor([1])]; tensor x_67_pad_0 = const()[name = tensor("x_67_pad_0"), val = tensor([0, 0])]; tensor x_67_dilations_0 = const()[name = tensor("x_67_dilations_0"), val = tensor([1])]; tensor x_67_groups_0 = const()[name = tensor("x_67_groups_0"), val = tensor(1)]; tensor layers_6_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_6_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103078848)))]; tensor x_67_cast_fp16 = conv(dilations = x_67_dilations_0, groups = x_67_groups_0, pad = x_67_pad_0, pad_type = x_67_pad_type_0, strides = x_67_strides_0, weight = layers_6_ffn_conv2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor("x_67_cast_fp16")]; tensor x_69_perm_0 = const()[name = tensor("x_69_perm_0"), val = tensor([0, 2, 1])]; tensor x_69_cast_fp16 = transpose(perm = x_69_perm_0, x = x_67_cast_fp16)[name = tensor("transpose_170")]; tensor input_143_cast_fp16 = add(x = input_135_cast_fp16, y = x_69_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor var_1284 = const()[name = tensor("op_1284"), val = tensor(-1)]; tensor x_71_axes_0 = const()[name = tensor("x_71_axes_0"), val = tensor([-1])]; tensor layers_7_norm_sa_weight_to_fp16 = const()[name = tensor("layers_7_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107797504)))]; tensor var_1287_to_fp16 = const()[name = tensor("op_1287_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_71_cast_fp16 = layer_norm(axes = x_71_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_sa_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("x_71_cast_fp16")]; tensor layers_7_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107799104)))]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_qkv_proj_weight_to_fp16, x = x_71_cast_fp16)[name = tensor("linear_35_cast_fp16")]; tensor var_1306 = const()[name = tensor("op_1306"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_31_cast_fp16 = reshape(shape = var_1306, x = linear_35_cast_fp16)[name = tensor("qkv_31_cast_fp16")]; tensor q_29_begin_0 = const()[name = tensor("q_29_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_29_end_0 = const()[name = tensor("q_29_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_29_end_mask_0 = const()[name = tensor("q_29_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_29_squeeze_mask_0 = const()[name = tensor("q_29_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_29_cast_fp16 = slice_by_index(begin = q_29_begin_0, end = q_29_end_0, end_mask = q_29_end_mask_0, squeeze_mask = q_29_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("q_29_cast_fp16")]; tensor k_29_begin_0 = const()[name = tensor("k_29_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_29_end_0 = const()[name = tensor("k_29_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_29_end_mask_0 = const()[name = tensor("k_29_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_29_squeeze_mask_0 = const()[name = tensor("k_29_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_29_cast_fp16 = slice_by_index(begin = k_29_begin_0, end = k_29_end_0, end_mask = k_29_end_mask_0, squeeze_mask = k_29_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("k_29_cast_fp16")]; tensor v_29_begin_0 = const()[name = tensor("v_29_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_29_end_0 = const()[name = tensor("v_29_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_29_end_mask_0 = const()[name = tensor("v_29_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_29_squeeze_mask_0 = const()[name = tensor("v_29_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_29_cast_fp16 = slice_by_index(begin = v_29_begin_0, end = v_29_end_0, end_mask = v_29_end_mask_0, squeeze_mask = v_29_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("v_29_cast_fp16")]; tensor var_1318_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position7)[name = tensor("op_1318_cast_fp16")]; tensor var_1320 = const()[name = tensor("op_1320"), val = tensor([1, 512, 1, 1])]; tensor var_1319_to_fp16_dtype_0 = const()[name = tensor("op_1319_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1318_cast_fp16_to_fp16 = cast(dtype = var_1319_to_fp16_dtype_0, x = var_1318_cast_fp16)[name = tensor("cast_171")]; tensor mask_15_cast_fp16 = reshape(shape = var_1320, x = var_1318_cast_fp16_to_fp16)[name = tensor("mask_15_cast_fp16")]; tensor k_new_15_reps_0 = const()[name = tensor("k_new_15_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_15_cast_fp16 = tile(reps = k_new_15_reps_0, x = k_29_cast_fp16)[name = tensor("k_new_15_cast_fp16")]; tensor v_new_15_reps_0 = const()[name = tensor("v_new_15_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_15_cast_fp16 = tile(reps = v_new_15_reps_0, x = v_29_cast_fp16)[name = tensor("v_new_15_cast_fp16")]; tensor var_1282_to_fp16 = const()[name = tensor("op_1282_to_fp16"), val = tensor(0x1p+0)]; tensor var_1326_cast_fp16 = sub(x = var_1282_to_fp16, y = mask_15_cast_fp16)[name = tensor("op_1326_cast_fp16")]; tensor var_1327_cast_fp16 = mul(x = cache_k7, y = var_1326_cast_fp16)[name = tensor("op_1327_cast_fp16")]; tensor var_1328_cast_fp16 = mul(x = k_new_15_cast_fp16, y = mask_15_cast_fp16)[name = tensor("op_1328_cast_fp16")]; tensor new_k_15 = add(x = var_1327_cast_fp16, y = var_1328_cast_fp16)[name = tensor("new_k_15_cast_fp16")]; tensor var_1331_cast_fp16 = mul(x = cache_v7, y = var_1326_cast_fp16)[name = tensor("op_1331_cast_fp16")]; tensor var_1332_cast_fp16 = mul(x = v_new_15_cast_fp16, y = mask_15_cast_fp16)[name = tensor("op_1332_cast_fp16")]; tensor new_v_15 = add(x = var_1331_cast_fp16, y = var_1332_cast_fp16)[name = tensor("new_v_15_cast_fp16")]; tensor var_1334_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position7)[name = tensor("op_1334_cast_fp16")]; tensor var_1336 = const()[name = tensor("op_1336"), val = tensor([1, 1, 1, 512])]; tensor var_1335_to_fp16_dtype_0 = const()[name = tensor("op_1335_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1334_cast_fp16_to_fp16 = cast(dtype = var_1335_to_fp16_dtype_0, x = var_1334_cast_fp16)[name = tensor("cast_170")]; tensor var_1337_cast_fp16 = reshape(shape = var_1336, x = var_1334_cast_fp16_to_fp16)[name = tensor("op_1337_cast_fp16")]; tensor var_1341 = const()[name = tensor("op_1341"), val = tensor([0, 2, 1, 3])]; tensor var_1344_transpose_x_0 = const()[name = tensor("op_1344_transpose_x_0"), val = tensor(false)]; tensor var_1344_transpose_y_0 = const()[name = tensor("op_1344_transpose_y_0"), val = tensor(false)]; tensor transpose_100_perm_0 = const()[name = tensor("transpose_100_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_101_perm_0 = const()[name = tensor("transpose_101_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = new_k_15)[name = tensor("transpose_167")]; tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = q_29_cast_fp16)[name = tensor("transpose_168")]; tensor var_1344_cast_fp16 = matmul(transpose_x = var_1344_transpose_x_0, transpose_y = var_1344_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor("op_1344_cast_fp16")]; tensor var_1345_to_fp16 = const()[name = tensor("op_1345_to_fp16"), val = tensor(0x1p-3)]; tensor attn_57_cast_fp16 = mul(x = var_1344_cast_fp16, y = var_1345_to_fp16)[name = tensor("attn_57_cast_fp16")]; tensor var_1347_cast_fp16 = sub(x = var_1282_to_fp16, y = var_1337_cast_fp16)[name = tensor("op_1347_cast_fp16")]; tensor var_1348_to_fp16 = const()[name = tensor("op_1348_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_1349_cast_fp16 = mul(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor("op_1349_cast_fp16")]; tensor input_145_cast_fp16 = add(x = attn_57_cast_fp16, y = var_1349_cast_fp16)[name = tensor("input_145_cast_fp16")]; tensor attn_59_cast_fp16 = softmax(axis = var_1284, x = input_145_cast_fp16)[name = tensor("attn_59_cast_fp16")]; tensor out_29_transpose_x_0 = const()[name = tensor("out_29_transpose_x_0"), val = tensor(false)]; tensor out_29_transpose_y_0 = const()[name = tensor("out_29_transpose_y_0"), val = tensor(false)]; tensor v4_15_cast_fp16 = transpose(perm = var_1341, x = new_v_15)[name = tensor("transpose_169")]; tensor out_29_cast_fp16 = matmul(transpose_x = out_29_transpose_x_0, transpose_y = out_29_transpose_y_0, x = attn_59_cast_fp16, y = v4_15_cast_fp16)[name = tensor("out_29_cast_fp16")]; tensor var_1353_perm_0 = const()[name = tensor("op_1353_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1354 = const()[name = tensor("op_1354"), val = tensor([1, 1, -1])]; tensor var_1353_cast_fp16 = transpose(perm = var_1353_perm_0, x = out_29_cast_fp16)[name = tensor("transpose_166")]; tensor input_147_cast_fp16 = reshape(shape = var_1354, x = var_1353_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111338112)))]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_147_cast_fp16)[name = tensor("linear_36_cast_fp16")]; tensor var_1358_to_fp16 = const()[name = tensor("op_1358_to_fp16"), val = tensor(0x1p+0)]; tensor var_1359 = add(x = position7, y = var_1358_to_fp16)[name = tensor("op_1359_cast_fp16")]; tensor input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_36_cast_fp16)[name = tensor("input_149_cast_fp16")]; tensor x_73_axes_0 = const()[name = tensor("x_73_axes_0"), val = tensor([-1])]; tensor layers_7_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_7_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112517824)))]; tensor x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_xa_query_weight_to_fp16, x = input_149_cast_fp16)[name = tensor("x_73_cast_fp16")]; tensor memory_15_axes_0 = const()[name = tensor("memory_15_axes_0"), val = tensor([-1])]; tensor layers_7_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_7_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112519424)))]; tensor memory_15_cast_fp16 = layer_norm(axes = memory_15_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_15_cast_fp16")]; tensor layers_7_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_7_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112521024)))]; tensor linear_37_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_7_cross_attn_q_proj_weight_to_fp16, x = x_73_cast_fp16)[name = tensor("linear_37_cast_fp16")]; tensor var_1380 = const()[name = tensor("op_1380"), val = tensor([1, 1, 1, 128])]; tensor var_1381_cast_fp16 = reshape(shape = var_1380, x = linear_37_cast_fp16)[name = tensor("op_1381_cast_fp16")]; tensor layers_7_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_7_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112717696)))]; tensor linear_38_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_7_cross_attn_kv_proj_weight_to_fp16, x = memory_15_cast_fp16)[name = tensor("linear_38_cast_fp16")]; tensor var_1385 = const()[name = tensor("op_1385"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_15_cast_fp16 = reshape(shape = var_1385, x = linear_38_cast_fp16)[name = tensor("kv_15_cast_fp16")]; tensor var_1389_begin_0 = const()[name = tensor("op_1389_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_1389_end_0 = const()[name = tensor("op_1389_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_1389_end_mask_0 = const()[name = tensor("op_1389_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1389_squeeze_mask_0 = const()[name = tensor("op_1389_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1389_cast_fp16 = slice_by_index(begin = var_1389_begin_0, end = var_1389_end_0, end_mask = var_1389_end_mask_0, squeeze_mask = var_1389_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor("op_1389_cast_fp16")]; tensor var_1393_begin_0 = const()[name = tensor("op_1393_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_1393_end_0 = const()[name = tensor("op_1393_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_1393_end_mask_0 = const()[name = tensor("op_1393_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1393_squeeze_mask_0 = const()[name = tensor("op_1393_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1393_cast_fp16 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, squeeze_mask = var_1393_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor("op_1393_cast_fp16")]; tensor v_31_perm_0 = const()[name = tensor("v_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1396_transpose_x_0 = const()[name = tensor("op_1396_transpose_x_0"), val = tensor(false)]; tensor var_1396_transpose_y_0 = const()[name = tensor("op_1396_transpose_y_0"), val = tensor(false)]; tensor transpose_102_perm_0 = const()[name = tensor("transpose_102_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_103_perm_0 = const()[name = tensor("transpose_103_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = var_1389_cast_fp16)[name = tensor("transpose_163")]; tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = var_1381_cast_fp16)[name = tensor("transpose_164")]; tensor var_1396_cast_fp16 = matmul(transpose_x = var_1396_transpose_x_0, transpose_y = var_1396_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor("op_1396_cast_fp16")]; tensor var_1397_to_fp16 = const()[name = tensor("op_1397_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_61_cast_fp16 = mul(x = var_1396_cast_fp16, y = var_1397_to_fp16)[name = tensor("attn_61_cast_fp16")]; tensor input_151_cast_fp16 = add(x = attn_61_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_151_cast_fp16")]; tensor attn_63_cast_fp16 = softmax(axis = var_1284, x = input_151_cast_fp16)[name = tensor("attn_63_cast_fp16")]; tensor out_31_transpose_x_0 = const()[name = tensor("out_31_transpose_x_0"), val = tensor(false)]; tensor out_31_transpose_y_0 = const()[name = tensor("out_31_transpose_y_0"), val = tensor(false)]; tensor v_31_cast_fp16 = transpose(perm = v_31_perm_0, x = var_1393_cast_fp16)[name = tensor("transpose_165")]; tensor out_31_cast_fp16 = matmul(transpose_x = out_31_transpose_x_0, transpose_y = out_31_transpose_y_0, x = attn_63_cast_fp16, y = v_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; tensor var_1408_perm_0 = const()[name = tensor("op_1408_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1409 = const()[name = tensor("op_1409"), val = tensor([1, 1, -1])]; tensor var_1408_cast_fp16 = transpose(perm = var_1408_perm_0, x = out_31_cast_fp16)[name = tensor("transpose_162")]; tensor input_153_cast_fp16 = reshape(shape = var_1409, x = var_1408_cast_fp16)[name = tensor("input_153_cast_fp16")]; tensor layers_7_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_7_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113110976)))]; tensor linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_cross_attn_o_proj_weight_to_fp16, x = input_153_cast_fp16)[name = tensor("linear_39_cast_fp16")]; tensor input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_39_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor x_75_axes_0 = const()[name = tensor("x_75_axes_0"), val = tensor([-1])]; tensor layers_7_norm_ff_weight_to_fp16 = const()[name = tensor("layers_7_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113307648)))]; tensor x_75_cast_fp16 = layer_norm(axes = x_75_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_ff_weight_to_fp16, x = input_155_cast_fp16)[name = tensor("x_75_cast_fp16")]; tensor input_157_perm_0 = const()[name = tensor("input_157_perm_0"), val = tensor([0, 2, 1])]; tensor input_159_pad_type_0 = const()[name = tensor("input_159_pad_type_0"), val = tensor("valid")]; tensor input_159_strides_0 = const()[name = tensor("input_159_strides_0"), val = tensor([1])]; tensor input_159_pad_0 = const()[name = tensor("input_159_pad_0"), val = tensor([0, 0])]; tensor input_159_dilations_0 = const()[name = tensor("input_159_dilations_0"), val = tensor([1])]; tensor input_159_groups_0 = const()[name = tensor("input_159_groups_0"), val = tensor(1)]; tensor layers_7_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_7_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113309248)))]; tensor input_157_cast_fp16 = transpose(perm = input_157_perm_0, x = x_75_cast_fp16)[name = tensor("transpose_161")]; tensor input_159_cast_fp16 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = layers_7_ffn_conv1_weight_to_fp16, x = input_157_cast_fp16)[name = tensor("input_159_cast_fp16")]; tensor input_161_mode_0 = const()[name = tensor("input_161_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor("input_161_cast_fp16")]; tensor x_77_pad_type_0 = const()[name = tensor("x_77_pad_type_0"), val = tensor("valid")]; tensor x_77_strides_0 = const()[name = tensor("x_77_strides_0"), val = tensor([1])]; tensor x_77_pad_0 = const()[name = tensor("x_77_pad_0"), val = tensor([0, 0])]; tensor x_77_dilations_0 = const()[name = tensor("x_77_dilations_0"), val = tensor([1])]; tensor x_77_groups_0 = const()[name = tensor("x_77_groups_0"), val = tensor(1)]; tensor layers_7_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_7_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118027904)))]; tensor x_77_cast_fp16 = conv(dilations = x_77_dilations_0, groups = x_77_groups_0, pad = x_77_pad_0, pad_type = x_77_pad_type_0, strides = x_77_strides_0, weight = layers_7_ffn_conv2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor("x_77_cast_fp16")]; tensor x_79_perm_0 = const()[name = tensor("x_79_perm_0"), val = tensor([0, 2, 1])]; tensor x_79_cast_fp16 = transpose(perm = x_79_perm_0, x = x_77_cast_fp16)[name = tensor("transpose_160")]; tensor input_163_cast_fp16 = add(x = input_155_cast_fp16, y = x_79_cast_fp16)[name = tensor("input_163_cast_fp16")]; tensor var_1454 = const()[name = tensor("op_1454"), val = tensor(-1)]; tensor x_81_axes_0 = const()[name = tensor("x_81_axes_0"), val = tensor([-1])]; tensor layers_8_norm_sa_weight_to_fp16 = const()[name = tensor("layers_8_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122746560)))]; tensor var_1457_to_fp16 = const()[name = tensor("op_1457_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_sa_weight_to_fp16, x = input_163_cast_fp16)[name = tensor("x_81_cast_fp16")]; tensor layers_8_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122748160)))]; tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_qkv_proj_weight_to_fp16, x = x_81_cast_fp16)[name = tensor("linear_40_cast_fp16")]; tensor var_1476 = const()[name = tensor("op_1476"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_35_cast_fp16 = reshape(shape = var_1476, x = linear_40_cast_fp16)[name = tensor("qkv_35_cast_fp16")]; tensor q_33_begin_0 = const()[name = tensor("q_33_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_33_end_0 = const()[name = tensor("q_33_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_33_end_mask_0 = const()[name = tensor("q_33_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_33_squeeze_mask_0 = const()[name = tensor("q_33_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_33_cast_fp16 = slice_by_index(begin = q_33_begin_0, end = q_33_end_0, end_mask = q_33_end_mask_0, squeeze_mask = q_33_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("q_33_cast_fp16")]; tensor k_33_begin_0 = const()[name = tensor("k_33_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_33_end_0 = const()[name = tensor("k_33_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_33_end_mask_0 = const()[name = tensor("k_33_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_33_squeeze_mask_0 = const()[name = tensor("k_33_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_33_cast_fp16 = slice_by_index(begin = k_33_begin_0, end = k_33_end_0, end_mask = k_33_end_mask_0, squeeze_mask = k_33_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("k_33_cast_fp16")]; tensor v_33_begin_0 = const()[name = tensor("v_33_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_33_end_0 = const()[name = tensor("v_33_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_33_end_mask_0 = const()[name = tensor("v_33_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_33_squeeze_mask_0 = const()[name = tensor("v_33_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_33_cast_fp16 = slice_by_index(begin = v_33_begin_0, end = v_33_end_0, end_mask = v_33_end_mask_0, squeeze_mask = v_33_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("v_33_cast_fp16")]; tensor var_1488_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position8)[name = tensor("op_1488_cast_fp16")]; tensor var_1490 = const()[name = tensor("op_1490"), val = tensor([1, 512, 1, 1])]; tensor var_1489_to_fp16_dtype_0 = const()[name = tensor("op_1489_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1488_cast_fp16_to_fp16 = cast(dtype = var_1489_to_fp16_dtype_0, x = var_1488_cast_fp16)[name = tensor("cast_169")]; tensor mask_17_cast_fp16 = reshape(shape = var_1490, x = var_1488_cast_fp16_to_fp16)[name = tensor("mask_17_cast_fp16")]; tensor k_new_17_reps_0 = const()[name = tensor("k_new_17_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_17_cast_fp16 = tile(reps = k_new_17_reps_0, x = k_33_cast_fp16)[name = tensor("k_new_17_cast_fp16")]; tensor v_new_17_reps_0 = const()[name = tensor("v_new_17_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_17_cast_fp16 = tile(reps = v_new_17_reps_0, x = v_33_cast_fp16)[name = tensor("v_new_17_cast_fp16")]; tensor var_1452_to_fp16 = const()[name = tensor("op_1452_to_fp16"), val = tensor(0x1p+0)]; tensor var_1496_cast_fp16 = sub(x = var_1452_to_fp16, y = mask_17_cast_fp16)[name = tensor("op_1496_cast_fp16")]; tensor var_1497_cast_fp16 = mul(x = cache_k8, y = var_1496_cast_fp16)[name = tensor("op_1497_cast_fp16")]; tensor var_1498_cast_fp16 = mul(x = k_new_17_cast_fp16, y = mask_17_cast_fp16)[name = tensor("op_1498_cast_fp16")]; tensor new_k_17 = add(x = var_1497_cast_fp16, y = var_1498_cast_fp16)[name = tensor("new_k_17_cast_fp16")]; tensor var_1501_cast_fp16 = mul(x = cache_v8, y = var_1496_cast_fp16)[name = tensor("op_1501_cast_fp16")]; tensor var_1502_cast_fp16 = mul(x = v_new_17_cast_fp16, y = mask_17_cast_fp16)[name = tensor("op_1502_cast_fp16")]; tensor new_v_17 = add(x = var_1501_cast_fp16, y = var_1502_cast_fp16)[name = tensor("new_v_17_cast_fp16")]; tensor var_1504_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position8)[name = tensor("op_1504_cast_fp16")]; tensor var_1506 = const()[name = tensor("op_1506"), val = tensor([1, 1, 1, 512])]; tensor var_1505_to_fp16_dtype_0 = const()[name = tensor("op_1505_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1504_cast_fp16_to_fp16 = cast(dtype = var_1505_to_fp16_dtype_0, x = var_1504_cast_fp16)[name = tensor("cast_168")]; tensor var_1507_cast_fp16 = reshape(shape = var_1506, x = var_1504_cast_fp16_to_fp16)[name = tensor("op_1507_cast_fp16")]; tensor var_1511 = const()[name = tensor("op_1511"), val = tensor([0, 2, 1, 3])]; tensor var_1514_transpose_x_0 = const()[name = tensor("op_1514_transpose_x_0"), val = tensor(false)]; tensor var_1514_transpose_y_0 = const()[name = tensor("op_1514_transpose_y_0"), val = tensor(false)]; tensor transpose_104_perm_0 = const()[name = tensor("transpose_104_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_105_perm_0 = const()[name = tensor("transpose_105_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = new_k_17)[name = tensor("transpose_157")]; tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = q_33_cast_fp16)[name = tensor("transpose_158")]; tensor var_1514_cast_fp16 = matmul(transpose_x = var_1514_transpose_x_0, transpose_y = var_1514_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor("op_1514_cast_fp16")]; tensor var_1515_to_fp16 = const()[name = tensor("op_1515_to_fp16"), val = tensor(0x1p-3)]; tensor attn_65_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor("attn_65_cast_fp16")]; tensor var_1517_cast_fp16 = sub(x = var_1452_to_fp16, y = var_1507_cast_fp16)[name = tensor("op_1517_cast_fp16")]; tensor var_1518_to_fp16 = const()[name = tensor("op_1518_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_1519_cast_fp16 = mul(x = var_1517_cast_fp16, y = var_1518_to_fp16)[name = tensor("op_1519_cast_fp16")]; tensor input_165_cast_fp16 = add(x = attn_65_cast_fp16, y = var_1519_cast_fp16)[name = tensor("input_165_cast_fp16")]; tensor attn_67_cast_fp16 = softmax(axis = var_1454, x = input_165_cast_fp16)[name = tensor("attn_67_cast_fp16")]; tensor out_33_transpose_x_0 = const()[name = tensor("out_33_transpose_x_0"), val = tensor(false)]; tensor out_33_transpose_y_0 = const()[name = tensor("out_33_transpose_y_0"), val = tensor(false)]; tensor v4_17_cast_fp16 = transpose(perm = var_1511, x = new_v_17)[name = tensor("transpose_159")]; tensor out_33_cast_fp16 = matmul(transpose_x = out_33_transpose_x_0, transpose_y = out_33_transpose_y_0, x = attn_67_cast_fp16, y = v4_17_cast_fp16)[name = tensor("out_33_cast_fp16")]; tensor var_1523_perm_0 = const()[name = tensor("op_1523_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1524 = const()[name = tensor("op_1524"), val = tensor([1, 1, -1])]; tensor var_1523_cast_fp16 = transpose(perm = var_1523_perm_0, x = out_33_cast_fp16)[name = tensor("transpose_156")]; tensor input_167_cast_fp16 = reshape(shape = var_1524, x = var_1523_cast_fp16)[name = tensor("input_167_cast_fp16")]; tensor layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126287168)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_167_cast_fp16)[name = tensor("linear_41_cast_fp16")]; tensor var_1528_to_fp16 = const()[name = tensor("op_1528_to_fp16"), val = tensor(0x1p+0)]; tensor var_1529 = add(x = position8, y = var_1528_to_fp16)[name = tensor("op_1529_cast_fp16")]; tensor input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_41_cast_fp16)[name = tensor("input_169_cast_fp16")]; tensor x_83_axes_0 = const()[name = tensor("x_83_axes_0"), val = tensor([-1])]; tensor layers_8_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_8_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127466880)))]; tensor x_83_cast_fp16 = layer_norm(axes = x_83_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_xa_query_weight_to_fp16, x = input_169_cast_fp16)[name = tensor("x_83_cast_fp16")]; tensor memory_17_axes_0 = const()[name = tensor("memory_17_axes_0"), val = tensor([-1])]; tensor layers_8_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_8_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127468480)))]; tensor memory_17_cast_fp16 = layer_norm(axes = memory_17_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_17_cast_fp16")]; tensor layers_8_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_8_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127470080)))]; tensor linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_8_cross_attn_q_proj_weight_to_fp16, x = x_83_cast_fp16)[name = tensor("linear_42_cast_fp16")]; tensor var_1550 = const()[name = tensor("op_1550"), val = tensor([1, 1, 1, 128])]; tensor var_1551_cast_fp16 = reshape(shape = var_1550, x = linear_42_cast_fp16)[name = tensor("op_1551_cast_fp16")]; tensor layers_8_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_8_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127666752)))]; tensor linear_43_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_8_cross_attn_kv_proj_weight_to_fp16, x = memory_17_cast_fp16)[name = tensor("linear_43_cast_fp16")]; tensor var_1555 = const()[name = tensor("op_1555"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_17_cast_fp16 = reshape(shape = var_1555, x = linear_43_cast_fp16)[name = tensor("kv_17_cast_fp16")]; tensor var_1559_begin_0 = const()[name = tensor("op_1559_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_1559_end_0 = const()[name = tensor("op_1559_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_1559_end_mask_0 = const()[name = tensor("op_1559_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1559_squeeze_mask_0 = const()[name = tensor("op_1559_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, squeeze_mask = var_1559_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor("op_1559_cast_fp16")]; tensor var_1563_begin_0 = const()[name = tensor("op_1563_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_1563_end_0 = const()[name = tensor("op_1563_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_1563_end_mask_0 = const()[name = tensor("op_1563_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1563_squeeze_mask_0 = const()[name = tensor("op_1563_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1563_cast_fp16 = slice_by_index(begin = var_1563_begin_0, end = var_1563_end_0, end_mask = var_1563_end_mask_0, squeeze_mask = var_1563_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor("op_1563_cast_fp16")]; tensor v_35_perm_0 = const()[name = tensor("v_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1566_transpose_x_0 = const()[name = tensor("op_1566_transpose_x_0"), val = tensor(false)]; tensor var_1566_transpose_y_0 = const()[name = tensor("op_1566_transpose_y_0"), val = tensor(false)]; tensor transpose_106_perm_0 = const()[name = tensor("transpose_106_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_107_perm_0 = const()[name = tensor("transpose_107_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = var_1559_cast_fp16)[name = tensor("transpose_153")]; tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = var_1551_cast_fp16)[name = tensor("transpose_154")]; tensor var_1566_cast_fp16 = matmul(transpose_x = var_1566_transpose_x_0, transpose_y = var_1566_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor("op_1566_cast_fp16")]; tensor var_1567_to_fp16 = const()[name = tensor("op_1567_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_69_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor("attn_69_cast_fp16")]; tensor input_171_cast_fp16 = add(x = attn_69_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_171_cast_fp16")]; tensor attn_71_cast_fp16 = softmax(axis = var_1454, x = input_171_cast_fp16)[name = tensor("attn_71_cast_fp16")]; tensor out_35_transpose_x_0 = const()[name = tensor("out_35_transpose_x_0"), val = tensor(false)]; tensor out_35_transpose_y_0 = const()[name = tensor("out_35_transpose_y_0"), val = tensor(false)]; tensor v_35_cast_fp16 = transpose(perm = v_35_perm_0, x = var_1563_cast_fp16)[name = tensor("transpose_155")]; tensor out_35_cast_fp16 = matmul(transpose_x = out_35_transpose_x_0, transpose_y = out_35_transpose_y_0, x = attn_71_cast_fp16, y = v_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; tensor var_1578_perm_0 = const()[name = tensor("op_1578_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1579 = const()[name = tensor("op_1579"), val = tensor([1, 1, -1])]; tensor var_1578_cast_fp16 = transpose(perm = var_1578_perm_0, x = out_35_cast_fp16)[name = tensor("transpose_152")]; tensor input_173_cast_fp16 = reshape(shape = var_1579, x = var_1578_cast_fp16)[name = tensor("input_173_cast_fp16")]; tensor layers_8_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_8_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128060032)))]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_cross_attn_o_proj_weight_to_fp16, x = input_173_cast_fp16)[name = tensor("linear_44_cast_fp16")]; tensor input_175_cast_fp16 = add(x = input_169_cast_fp16, y = linear_44_cast_fp16)[name = tensor("input_175_cast_fp16")]; tensor x_85_axes_0 = const()[name = tensor("x_85_axes_0"), val = tensor([-1])]; tensor layers_8_norm_ff_weight_to_fp16 = const()[name = tensor("layers_8_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128256704)))]; tensor x_85_cast_fp16 = layer_norm(axes = x_85_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_ff_weight_to_fp16, x = input_175_cast_fp16)[name = tensor("x_85_cast_fp16")]; tensor input_177_perm_0 = const()[name = tensor("input_177_perm_0"), val = tensor([0, 2, 1])]; tensor input_179_pad_type_0 = const()[name = tensor("input_179_pad_type_0"), val = tensor("valid")]; tensor input_179_strides_0 = const()[name = tensor("input_179_strides_0"), val = tensor([1])]; tensor input_179_pad_0 = const()[name = tensor("input_179_pad_0"), val = tensor([0, 0])]; tensor input_179_dilations_0 = const()[name = tensor("input_179_dilations_0"), val = tensor([1])]; tensor input_179_groups_0 = const()[name = tensor("input_179_groups_0"), val = tensor(1)]; tensor layers_8_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_8_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128258304)))]; tensor input_177_cast_fp16 = transpose(perm = input_177_perm_0, x = x_85_cast_fp16)[name = tensor("transpose_151")]; tensor input_179_cast_fp16 = conv(dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = layers_8_ffn_conv1_weight_to_fp16, x = input_177_cast_fp16)[name = tensor("input_179_cast_fp16")]; tensor input_181_mode_0 = const()[name = tensor("input_181_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor("input_181_cast_fp16")]; tensor x_87_pad_type_0 = const()[name = tensor("x_87_pad_type_0"), val = tensor("valid")]; tensor x_87_strides_0 = const()[name = tensor("x_87_strides_0"), val = tensor([1])]; tensor x_87_pad_0 = const()[name = tensor("x_87_pad_0"), val = tensor([0, 0])]; tensor x_87_dilations_0 = const()[name = tensor("x_87_dilations_0"), val = tensor([1])]; tensor x_87_groups_0 = const()[name = tensor("x_87_groups_0"), val = tensor(1)]; tensor layers_8_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_8_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132976960)))]; tensor x_87_cast_fp16 = conv(dilations = x_87_dilations_0, groups = x_87_groups_0, pad = x_87_pad_0, pad_type = x_87_pad_type_0, strides = x_87_strides_0, weight = layers_8_ffn_conv2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor("x_87_cast_fp16")]; tensor x_89_perm_0 = const()[name = tensor("x_89_perm_0"), val = tensor([0, 2, 1])]; tensor x_89_cast_fp16 = transpose(perm = x_89_perm_0, x = x_87_cast_fp16)[name = tensor("transpose_150")]; tensor input_183_cast_fp16 = add(x = input_175_cast_fp16, y = x_89_cast_fp16)[name = tensor("input_183_cast_fp16")]; tensor var_1624 = const()[name = tensor("op_1624"), val = tensor(-1)]; tensor x_91_axes_0 = const()[name = tensor("x_91_axes_0"), val = tensor([-1])]; tensor layers_9_norm_sa_weight_to_fp16 = const()[name = tensor("layers_9_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137695616)))]; tensor var_1627_to_fp16 = const()[name = tensor("op_1627_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_91_cast_fp16 = layer_norm(axes = x_91_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_sa_weight_to_fp16, x = input_183_cast_fp16)[name = tensor("x_91_cast_fp16")]; tensor layers_9_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137697216)))]; tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_qkv_proj_weight_to_fp16, x = x_91_cast_fp16)[name = tensor("linear_45_cast_fp16")]; tensor var_1646 = const()[name = tensor("op_1646"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_39_cast_fp16 = reshape(shape = var_1646, x = linear_45_cast_fp16)[name = tensor("qkv_39_cast_fp16")]; tensor q_37_begin_0 = const()[name = tensor("q_37_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_37_end_0 = const()[name = tensor("q_37_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_37_end_mask_0 = const()[name = tensor("q_37_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_37_squeeze_mask_0 = const()[name = tensor("q_37_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_37_cast_fp16 = slice_by_index(begin = q_37_begin_0, end = q_37_end_0, end_mask = q_37_end_mask_0, squeeze_mask = q_37_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("q_37_cast_fp16")]; tensor k_37_begin_0 = const()[name = tensor("k_37_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_37_end_0 = const()[name = tensor("k_37_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_37_end_mask_0 = const()[name = tensor("k_37_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_37_squeeze_mask_0 = const()[name = tensor("k_37_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_37_cast_fp16 = slice_by_index(begin = k_37_begin_0, end = k_37_end_0, end_mask = k_37_end_mask_0, squeeze_mask = k_37_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("k_37_cast_fp16")]; tensor v_37_begin_0 = const()[name = tensor("v_37_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_37_end_0 = const()[name = tensor("v_37_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_37_end_mask_0 = const()[name = tensor("v_37_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_37_squeeze_mask_0 = const()[name = tensor("v_37_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_37_cast_fp16 = slice_by_index(begin = v_37_begin_0, end = v_37_end_0, end_mask = v_37_end_mask_0, squeeze_mask = v_37_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("v_37_cast_fp16")]; tensor var_1658_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position9)[name = tensor("op_1658_cast_fp16")]; tensor var_1660 = const()[name = tensor("op_1660"), val = tensor([1, 512, 1, 1])]; tensor var_1659_to_fp16_dtype_0 = const()[name = tensor("op_1659_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1658_cast_fp16_to_fp16 = cast(dtype = var_1659_to_fp16_dtype_0, x = var_1658_cast_fp16)[name = tensor("cast_167")]; tensor mask_19_cast_fp16 = reshape(shape = var_1660, x = var_1658_cast_fp16_to_fp16)[name = tensor("mask_19_cast_fp16")]; tensor k_new_19_reps_0 = const()[name = tensor("k_new_19_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_19_cast_fp16 = tile(reps = k_new_19_reps_0, x = k_37_cast_fp16)[name = tensor("k_new_19_cast_fp16")]; tensor v_new_19_reps_0 = const()[name = tensor("v_new_19_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_19_cast_fp16 = tile(reps = v_new_19_reps_0, x = v_37_cast_fp16)[name = tensor("v_new_19_cast_fp16")]; tensor var_1622_to_fp16 = const()[name = tensor("op_1622_to_fp16"), val = tensor(0x1p+0)]; tensor var_1666_cast_fp16 = sub(x = var_1622_to_fp16, y = mask_19_cast_fp16)[name = tensor("op_1666_cast_fp16")]; tensor var_1667_cast_fp16 = mul(x = cache_k9, y = var_1666_cast_fp16)[name = tensor("op_1667_cast_fp16")]; tensor var_1668_cast_fp16 = mul(x = k_new_19_cast_fp16, y = mask_19_cast_fp16)[name = tensor("op_1668_cast_fp16")]; tensor new_k_19 = add(x = var_1667_cast_fp16, y = var_1668_cast_fp16)[name = tensor("new_k_19_cast_fp16")]; tensor var_1671_cast_fp16 = mul(x = cache_v9, y = var_1666_cast_fp16)[name = tensor("op_1671_cast_fp16")]; tensor var_1672_cast_fp16 = mul(x = v_new_19_cast_fp16, y = mask_19_cast_fp16)[name = tensor("op_1672_cast_fp16")]; tensor new_v_19 = add(x = var_1671_cast_fp16, y = var_1672_cast_fp16)[name = tensor("new_v_19_cast_fp16")]; tensor var_1674_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position9)[name = tensor("op_1674_cast_fp16")]; tensor var_1676 = const()[name = tensor("op_1676"), val = tensor([1, 1, 1, 512])]; tensor var_1675_to_fp16_dtype_0 = const()[name = tensor("op_1675_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1674_cast_fp16_to_fp16 = cast(dtype = var_1675_to_fp16_dtype_0, x = var_1674_cast_fp16)[name = tensor("cast_166")]; tensor var_1677_cast_fp16 = reshape(shape = var_1676, x = var_1674_cast_fp16_to_fp16)[name = tensor("op_1677_cast_fp16")]; tensor var_1681 = const()[name = tensor("op_1681"), val = tensor([0, 2, 1, 3])]; tensor var_1684_transpose_x_0 = const()[name = tensor("op_1684_transpose_x_0"), val = tensor(false)]; tensor var_1684_transpose_y_0 = const()[name = tensor("op_1684_transpose_y_0"), val = tensor(false)]; tensor transpose_108_perm_0 = const()[name = tensor("transpose_108_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_109_perm_0 = const()[name = tensor("transpose_109_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = new_k_19)[name = tensor("transpose_147")]; tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = q_37_cast_fp16)[name = tensor("transpose_148")]; tensor var_1684_cast_fp16 = matmul(transpose_x = var_1684_transpose_x_0, transpose_y = var_1684_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor("op_1684_cast_fp16")]; tensor var_1685_to_fp16 = const()[name = tensor("op_1685_to_fp16"), val = tensor(0x1p-3)]; tensor attn_73_cast_fp16 = mul(x = var_1684_cast_fp16, y = var_1685_to_fp16)[name = tensor("attn_73_cast_fp16")]; tensor var_1687_cast_fp16 = sub(x = var_1622_to_fp16, y = var_1677_cast_fp16)[name = tensor("op_1687_cast_fp16")]; tensor var_1688_to_fp16 = const()[name = tensor("op_1688_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_1689_cast_fp16 = mul(x = var_1687_cast_fp16, y = var_1688_to_fp16)[name = tensor("op_1689_cast_fp16")]; tensor input_185_cast_fp16 = add(x = attn_73_cast_fp16, y = var_1689_cast_fp16)[name = tensor("input_185_cast_fp16")]; tensor attn_75_cast_fp16 = softmax(axis = var_1624, x = input_185_cast_fp16)[name = tensor("attn_75_cast_fp16")]; tensor out_37_transpose_x_0 = const()[name = tensor("out_37_transpose_x_0"), val = tensor(false)]; tensor out_37_transpose_y_0 = const()[name = tensor("out_37_transpose_y_0"), val = tensor(false)]; tensor v4_19_cast_fp16 = transpose(perm = var_1681, x = new_v_19)[name = tensor("transpose_149")]; tensor out_37_cast_fp16 = matmul(transpose_x = out_37_transpose_x_0, transpose_y = out_37_transpose_y_0, x = attn_75_cast_fp16, y = v4_19_cast_fp16)[name = tensor("out_37_cast_fp16")]; tensor var_1693_perm_0 = const()[name = tensor("op_1693_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1694 = const()[name = tensor("op_1694"), val = tensor([1, 1, -1])]; tensor var_1693_cast_fp16 = transpose(perm = var_1693_perm_0, x = out_37_cast_fp16)[name = tensor("transpose_146")]; tensor input_187_cast_fp16 = reshape(shape = var_1694, x = var_1693_cast_fp16)[name = tensor("input_187_cast_fp16")]; tensor layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141236224)))]; tensor linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_187_cast_fp16)[name = tensor("linear_46_cast_fp16")]; tensor var_1698_to_fp16 = const()[name = tensor("op_1698_to_fp16"), val = tensor(0x1p+0)]; tensor var_1699 = add(x = position9, y = var_1698_to_fp16)[name = tensor("op_1699_cast_fp16")]; tensor input_189_cast_fp16 = add(x = input_183_cast_fp16, y = linear_46_cast_fp16)[name = tensor("input_189_cast_fp16")]; tensor x_93_axes_0 = const()[name = tensor("x_93_axes_0"), val = tensor([-1])]; tensor layers_9_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_9_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142415936)))]; tensor x_93_cast_fp16 = layer_norm(axes = x_93_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_xa_query_weight_to_fp16, x = input_189_cast_fp16)[name = tensor("x_93_cast_fp16")]; tensor memory_19_axes_0 = const()[name = tensor("memory_19_axes_0"), val = tensor([-1])]; tensor layers_9_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_9_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142417536)))]; tensor memory_19_cast_fp16 = layer_norm(axes = memory_19_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_19_cast_fp16")]; tensor layers_9_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_9_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142419136)))]; tensor linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_9_cross_attn_q_proj_weight_to_fp16, x = x_93_cast_fp16)[name = tensor("linear_47_cast_fp16")]; tensor var_1720 = const()[name = tensor("op_1720"), val = tensor([1, 1, 1, 128])]; tensor var_1721_cast_fp16 = reshape(shape = var_1720, x = linear_47_cast_fp16)[name = tensor("op_1721_cast_fp16")]; tensor layers_9_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_9_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142615808)))]; tensor linear_48_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_9_cross_attn_kv_proj_weight_to_fp16, x = memory_19_cast_fp16)[name = tensor("linear_48_cast_fp16")]; tensor var_1725 = const()[name = tensor("op_1725"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_19_cast_fp16 = reshape(shape = var_1725, x = linear_48_cast_fp16)[name = tensor("kv_19_cast_fp16")]; tensor var_1729_begin_0 = const()[name = tensor("op_1729_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_1729_end_0 = const()[name = tensor("op_1729_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_1729_end_mask_0 = const()[name = tensor("op_1729_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1729_squeeze_mask_0 = const()[name = tensor("op_1729_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1729_cast_fp16 = slice_by_index(begin = var_1729_begin_0, end = var_1729_end_0, end_mask = var_1729_end_mask_0, squeeze_mask = var_1729_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor("op_1729_cast_fp16")]; tensor var_1733_begin_0 = const()[name = tensor("op_1733_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_1733_end_0 = const()[name = tensor("op_1733_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_1733_end_mask_0 = const()[name = tensor("op_1733_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1733_squeeze_mask_0 = const()[name = tensor("op_1733_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1733_cast_fp16 = slice_by_index(begin = var_1733_begin_0, end = var_1733_end_0, end_mask = var_1733_end_mask_0, squeeze_mask = var_1733_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor("op_1733_cast_fp16")]; tensor v_39_perm_0 = const()[name = tensor("v_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1736_transpose_x_0 = const()[name = tensor("op_1736_transpose_x_0"), val = tensor(false)]; tensor var_1736_transpose_y_0 = const()[name = tensor("op_1736_transpose_y_0"), val = tensor(false)]; tensor transpose_110_perm_0 = const()[name = tensor("transpose_110_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_111_perm_0 = const()[name = tensor("transpose_111_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = var_1729_cast_fp16)[name = tensor("transpose_143")]; tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = var_1721_cast_fp16)[name = tensor("transpose_144")]; tensor var_1736_cast_fp16 = matmul(transpose_x = var_1736_transpose_x_0, transpose_y = var_1736_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor("op_1736_cast_fp16")]; tensor var_1737_to_fp16 = const()[name = tensor("op_1737_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_77_cast_fp16 = mul(x = var_1736_cast_fp16, y = var_1737_to_fp16)[name = tensor("attn_77_cast_fp16")]; tensor input_191_cast_fp16 = add(x = attn_77_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_191_cast_fp16")]; tensor attn_79_cast_fp16 = softmax(axis = var_1624, x = input_191_cast_fp16)[name = tensor("attn_79_cast_fp16")]; tensor out_39_transpose_x_0 = const()[name = tensor("out_39_transpose_x_0"), val = tensor(false)]; tensor out_39_transpose_y_0 = const()[name = tensor("out_39_transpose_y_0"), val = tensor(false)]; tensor v_39_cast_fp16 = transpose(perm = v_39_perm_0, x = var_1733_cast_fp16)[name = tensor("transpose_145")]; tensor out_39_cast_fp16 = matmul(transpose_x = out_39_transpose_x_0, transpose_y = out_39_transpose_y_0, x = attn_79_cast_fp16, y = v_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; tensor var_1748_perm_0 = const()[name = tensor("op_1748_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1749 = const()[name = tensor("op_1749"), val = tensor([1, 1, -1])]; tensor var_1748_cast_fp16 = transpose(perm = var_1748_perm_0, x = out_39_cast_fp16)[name = tensor("transpose_142")]; tensor input_193_cast_fp16 = reshape(shape = var_1749, x = var_1748_cast_fp16)[name = tensor("input_193_cast_fp16")]; tensor layers_9_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_9_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143009088)))]; tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_cross_attn_o_proj_weight_to_fp16, x = input_193_cast_fp16)[name = tensor("linear_49_cast_fp16")]; tensor input_195_cast_fp16 = add(x = input_189_cast_fp16, y = linear_49_cast_fp16)[name = tensor("input_195_cast_fp16")]; tensor x_95_axes_0 = const()[name = tensor("x_95_axes_0"), val = tensor([-1])]; tensor layers_9_norm_ff_weight_to_fp16 = const()[name = tensor("layers_9_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143205760)))]; tensor x_95_cast_fp16 = layer_norm(axes = x_95_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_ff_weight_to_fp16, x = input_195_cast_fp16)[name = tensor("x_95_cast_fp16")]; tensor input_197_perm_0 = const()[name = tensor("input_197_perm_0"), val = tensor([0, 2, 1])]; tensor input_199_pad_type_0 = const()[name = tensor("input_199_pad_type_0"), val = tensor("valid")]; tensor input_199_strides_0 = const()[name = tensor("input_199_strides_0"), val = tensor([1])]; tensor input_199_pad_0 = const()[name = tensor("input_199_pad_0"), val = tensor([0, 0])]; tensor input_199_dilations_0 = const()[name = tensor("input_199_dilations_0"), val = tensor([1])]; tensor input_199_groups_0 = const()[name = tensor("input_199_groups_0"), val = tensor(1)]; tensor layers_9_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_9_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143207360)))]; tensor input_197_cast_fp16 = transpose(perm = input_197_perm_0, x = x_95_cast_fp16)[name = tensor("transpose_141")]; tensor input_199_cast_fp16 = conv(dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = layers_9_ffn_conv1_weight_to_fp16, x = input_197_cast_fp16)[name = tensor("input_199_cast_fp16")]; tensor input_201_mode_0 = const()[name = tensor("input_201_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor("input_201_cast_fp16")]; tensor x_97_pad_type_0 = const()[name = tensor("x_97_pad_type_0"), val = tensor("valid")]; tensor x_97_strides_0 = const()[name = tensor("x_97_strides_0"), val = tensor([1])]; tensor x_97_pad_0 = const()[name = tensor("x_97_pad_0"), val = tensor([0, 0])]; tensor x_97_dilations_0 = const()[name = tensor("x_97_dilations_0"), val = tensor([1])]; tensor x_97_groups_0 = const()[name = tensor("x_97_groups_0"), val = tensor(1)]; tensor layers_9_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_9_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147926016)))]; tensor x_97_cast_fp16 = conv(dilations = x_97_dilations_0, groups = x_97_groups_0, pad = x_97_pad_0, pad_type = x_97_pad_type_0, strides = x_97_strides_0, weight = layers_9_ffn_conv2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor("x_97_cast_fp16")]; tensor x_99_perm_0 = const()[name = tensor("x_99_perm_0"), val = tensor([0, 2, 1])]; tensor x_99_cast_fp16 = transpose(perm = x_99_perm_0, x = x_97_cast_fp16)[name = tensor("transpose_140")]; tensor input_203_cast_fp16 = add(x = input_195_cast_fp16, y = x_99_cast_fp16)[name = tensor("input_203_cast_fp16")]; tensor var_1794 = const()[name = tensor("op_1794"), val = tensor(-1)]; tensor x_101_axes_0 = const()[name = tensor("x_101_axes_0"), val = tensor([-1])]; tensor layers_10_norm_sa_weight_to_fp16 = const()[name = tensor("layers_10_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152644672)))]; tensor var_1797_to_fp16 = const()[name = tensor("op_1797_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_101_cast_fp16 = layer_norm(axes = x_101_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_sa_weight_to_fp16, x = input_203_cast_fp16)[name = tensor("x_101_cast_fp16")]; tensor layers_10_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152646272)))]; tensor linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_qkv_proj_weight_to_fp16, x = x_101_cast_fp16)[name = tensor("linear_50_cast_fp16")]; tensor var_1816 = const()[name = tensor("op_1816"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_43_cast_fp16 = reshape(shape = var_1816, x = linear_50_cast_fp16)[name = tensor("qkv_43_cast_fp16")]; tensor q_41_begin_0 = const()[name = tensor("q_41_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_41_end_0 = const()[name = tensor("q_41_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_41_end_mask_0 = const()[name = tensor("q_41_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_41_squeeze_mask_0 = const()[name = tensor("q_41_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_41_cast_fp16 = slice_by_index(begin = q_41_begin_0, end = q_41_end_0, end_mask = q_41_end_mask_0, squeeze_mask = q_41_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("q_41_cast_fp16")]; tensor k_41_begin_0 = const()[name = tensor("k_41_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_41_end_0 = const()[name = tensor("k_41_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_41_end_mask_0 = const()[name = tensor("k_41_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_41_squeeze_mask_0 = const()[name = tensor("k_41_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_41_cast_fp16 = slice_by_index(begin = k_41_begin_0, end = k_41_end_0, end_mask = k_41_end_mask_0, squeeze_mask = k_41_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("k_41_cast_fp16")]; tensor v_41_begin_0 = const()[name = tensor("v_41_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_41_end_0 = const()[name = tensor("v_41_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_41_end_mask_0 = const()[name = tensor("v_41_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_41_squeeze_mask_0 = const()[name = tensor("v_41_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_41_cast_fp16 = slice_by_index(begin = v_41_begin_0, end = v_41_end_0, end_mask = v_41_end_mask_0, squeeze_mask = v_41_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("v_41_cast_fp16")]; tensor var_1828_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position10)[name = tensor("op_1828_cast_fp16")]; tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 512, 1, 1])]; tensor var_1829_to_fp16_dtype_0 = const()[name = tensor("op_1829_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1828_cast_fp16_to_fp16 = cast(dtype = var_1829_to_fp16_dtype_0, x = var_1828_cast_fp16)[name = tensor("cast_165")]; tensor mask_21_cast_fp16 = reshape(shape = var_1830, x = var_1828_cast_fp16_to_fp16)[name = tensor("mask_21_cast_fp16")]; tensor k_new_21_reps_0 = const()[name = tensor("k_new_21_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_21_cast_fp16 = tile(reps = k_new_21_reps_0, x = k_41_cast_fp16)[name = tensor("k_new_21_cast_fp16")]; tensor v_new_21_reps_0 = const()[name = tensor("v_new_21_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_21_cast_fp16 = tile(reps = v_new_21_reps_0, x = v_41_cast_fp16)[name = tensor("v_new_21_cast_fp16")]; tensor var_1792_to_fp16 = const()[name = tensor("op_1792_to_fp16"), val = tensor(0x1p+0)]; tensor var_1836_cast_fp16 = sub(x = var_1792_to_fp16, y = mask_21_cast_fp16)[name = tensor("op_1836_cast_fp16")]; tensor var_1837_cast_fp16 = mul(x = cache_k10, y = var_1836_cast_fp16)[name = tensor("op_1837_cast_fp16")]; tensor var_1838_cast_fp16 = mul(x = k_new_21_cast_fp16, y = mask_21_cast_fp16)[name = tensor("op_1838_cast_fp16")]; tensor new_k_21 = add(x = var_1837_cast_fp16, y = var_1838_cast_fp16)[name = tensor("new_k_21_cast_fp16")]; tensor var_1841_cast_fp16 = mul(x = cache_v10, y = var_1836_cast_fp16)[name = tensor("op_1841_cast_fp16")]; tensor var_1842_cast_fp16 = mul(x = v_new_21_cast_fp16, y = mask_21_cast_fp16)[name = tensor("op_1842_cast_fp16")]; tensor new_v_21 = add(x = var_1841_cast_fp16, y = var_1842_cast_fp16)[name = tensor("new_v_21_cast_fp16")]; tensor var_1844_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position10)[name = tensor("op_1844_cast_fp16")]; tensor var_1846 = const()[name = tensor("op_1846"), val = tensor([1, 1, 1, 512])]; tensor var_1845_to_fp16_dtype_0 = const()[name = tensor("op_1845_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1844_cast_fp16_to_fp16 = cast(dtype = var_1845_to_fp16_dtype_0, x = var_1844_cast_fp16)[name = tensor("cast_164")]; tensor var_1847_cast_fp16 = reshape(shape = var_1846, x = var_1844_cast_fp16_to_fp16)[name = tensor("op_1847_cast_fp16")]; tensor var_1851 = const()[name = tensor("op_1851"), val = tensor([0, 2, 1, 3])]; tensor var_1854_transpose_x_0 = const()[name = tensor("op_1854_transpose_x_0"), val = tensor(false)]; tensor var_1854_transpose_y_0 = const()[name = tensor("op_1854_transpose_y_0"), val = tensor(false)]; tensor transpose_112_perm_0 = const()[name = tensor("transpose_112_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_113_perm_0 = const()[name = tensor("transpose_113_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = new_k_21)[name = tensor("transpose_137")]; tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = q_41_cast_fp16)[name = tensor("transpose_138")]; tensor var_1854_cast_fp16 = matmul(transpose_x = var_1854_transpose_x_0, transpose_y = var_1854_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor("op_1854_cast_fp16")]; tensor var_1855_to_fp16 = const()[name = tensor("op_1855_to_fp16"), val = tensor(0x1p-3)]; tensor attn_81_cast_fp16 = mul(x = var_1854_cast_fp16, y = var_1855_to_fp16)[name = tensor("attn_81_cast_fp16")]; tensor var_1857_cast_fp16 = sub(x = var_1792_to_fp16, y = var_1847_cast_fp16)[name = tensor("op_1857_cast_fp16")]; tensor var_1858_to_fp16 = const()[name = tensor("op_1858_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_1859_cast_fp16 = mul(x = var_1857_cast_fp16, y = var_1858_to_fp16)[name = tensor("op_1859_cast_fp16")]; tensor input_205_cast_fp16 = add(x = attn_81_cast_fp16, y = var_1859_cast_fp16)[name = tensor("input_205_cast_fp16")]; tensor attn_83_cast_fp16 = softmax(axis = var_1794, x = input_205_cast_fp16)[name = tensor("attn_83_cast_fp16")]; tensor out_41_transpose_x_0 = const()[name = tensor("out_41_transpose_x_0"), val = tensor(false)]; tensor out_41_transpose_y_0 = const()[name = tensor("out_41_transpose_y_0"), val = tensor(false)]; tensor v4_21_cast_fp16 = transpose(perm = var_1851, x = new_v_21)[name = tensor("transpose_139")]; tensor out_41_cast_fp16 = matmul(transpose_x = out_41_transpose_x_0, transpose_y = out_41_transpose_y_0, x = attn_83_cast_fp16, y = v4_21_cast_fp16)[name = tensor("out_41_cast_fp16")]; tensor var_1863_perm_0 = const()[name = tensor("op_1863_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1864 = const()[name = tensor("op_1864"), val = tensor([1, 1, -1])]; tensor var_1863_cast_fp16 = transpose(perm = var_1863_perm_0, x = out_41_cast_fp16)[name = tensor("transpose_136")]; tensor input_207_cast_fp16 = reshape(shape = var_1864, x = var_1863_cast_fp16)[name = tensor("input_207_cast_fp16")]; tensor layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156185280)))]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_207_cast_fp16)[name = tensor("linear_51_cast_fp16")]; tensor var_1868_to_fp16 = const()[name = tensor("op_1868_to_fp16"), val = tensor(0x1p+0)]; tensor var_1869 = add(x = position10, y = var_1868_to_fp16)[name = tensor("op_1869_cast_fp16")]; tensor input_209_cast_fp16 = add(x = input_203_cast_fp16, y = linear_51_cast_fp16)[name = tensor("input_209_cast_fp16")]; tensor x_103_axes_0 = const()[name = tensor("x_103_axes_0"), val = tensor([-1])]; tensor layers_10_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_10_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157364992)))]; tensor x_103_cast_fp16 = layer_norm(axes = x_103_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_xa_query_weight_to_fp16, x = input_209_cast_fp16)[name = tensor("x_103_cast_fp16")]; tensor memory_21_axes_0 = const()[name = tensor("memory_21_axes_0"), val = tensor([-1])]; tensor layers_10_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_10_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157366592)))]; tensor memory_21_cast_fp16 = layer_norm(axes = memory_21_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_21_cast_fp16")]; tensor layers_10_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_10_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157368192)))]; tensor linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_10_cross_attn_q_proj_weight_to_fp16, x = x_103_cast_fp16)[name = tensor("linear_52_cast_fp16")]; tensor var_1890 = const()[name = tensor("op_1890"), val = tensor([1, 1, 1, 128])]; tensor var_1891_cast_fp16 = reshape(shape = var_1890, x = linear_52_cast_fp16)[name = tensor("op_1891_cast_fp16")]; tensor layers_10_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_10_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157564864)))]; tensor linear_53_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_10_cross_attn_kv_proj_weight_to_fp16, x = memory_21_cast_fp16)[name = tensor("linear_53_cast_fp16")]; tensor var_1895 = const()[name = tensor("op_1895"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_21_cast_fp16 = reshape(shape = var_1895, x = linear_53_cast_fp16)[name = tensor("kv_21_cast_fp16")]; tensor var_1899_begin_0 = const()[name = tensor("op_1899_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_1899_end_0 = const()[name = tensor("op_1899_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_1899_end_mask_0 = const()[name = tensor("op_1899_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1899_squeeze_mask_0 = const()[name = tensor("op_1899_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1899_cast_fp16 = slice_by_index(begin = var_1899_begin_0, end = var_1899_end_0, end_mask = var_1899_end_mask_0, squeeze_mask = var_1899_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor("op_1899_cast_fp16")]; tensor var_1903_begin_0 = const()[name = tensor("op_1903_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_1903_end_0 = const()[name = tensor("op_1903_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_1903_end_mask_0 = const()[name = tensor("op_1903_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_1903_squeeze_mask_0 = const()[name = tensor("op_1903_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_1903_cast_fp16 = slice_by_index(begin = var_1903_begin_0, end = var_1903_end_0, end_mask = var_1903_end_mask_0, squeeze_mask = var_1903_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor("op_1903_cast_fp16")]; tensor v_43_perm_0 = const()[name = tensor("v_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1906_transpose_x_0 = const()[name = tensor("op_1906_transpose_x_0"), val = tensor(false)]; tensor var_1906_transpose_y_0 = const()[name = tensor("op_1906_transpose_y_0"), val = tensor(false)]; tensor transpose_114_perm_0 = const()[name = tensor("transpose_114_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_115_perm_0 = const()[name = tensor("transpose_115_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = var_1899_cast_fp16)[name = tensor("transpose_133")]; tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = var_1891_cast_fp16)[name = tensor("transpose_134")]; tensor var_1906_cast_fp16 = matmul(transpose_x = var_1906_transpose_x_0, transpose_y = var_1906_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor("op_1906_cast_fp16")]; tensor var_1907_to_fp16 = const()[name = tensor("op_1907_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_85_cast_fp16 = mul(x = var_1906_cast_fp16, y = var_1907_to_fp16)[name = tensor("attn_85_cast_fp16")]; tensor input_211_cast_fp16 = add(x = attn_85_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_211_cast_fp16")]; tensor attn_87_cast_fp16 = softmax(axis = var_1794, x = input_211_cast_fp16)[name = tensor("attn_87_cast_fp16")]; tensor out_43_transpose_x_0 = const()[name = tensor("out_43_transpose_x_0"), val = tensor(false)]; tensor out_43_transpose_y_0 = const()[name = tensor("out_43_transpose_y_0"), val = tensor(false)]; tensor v_43_cast_fp16 = transpose(perm = v_43_perm_0, x = var_1903_cast_fp16)[name = tensor("transpose_135")]; tensor out_43_cast_fp16 = matmul(transpose_x = out_43_transpose_x_0, transpose_y = out_43_transpose_y_0, x = attn_87_cast_fp16, y = v_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; tensor var_1918_perm_0 = const()[name = tensor("op_1918_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1919 = const()[name = tensor("op_1919"), val = tensor([1, 1, -1])]; tensor var_1918_cast_fp16 = transpose(perm = var_1918_perm_0, x = out_43_cast_fp16)[name = tensor("transpose_132")]; tensor input_213_cast_fp16 = reshape(shape = var_1919, x = var_1918_cast_fp16)[name = tensor("input_213_cast_fp16")]; tensor layers_10_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_10_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157958144)))]; tensor linear_54_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_cross_attn_o_proj_weight_to_fp16, x = input_213_cast_fp16)[name = tensor("linear_54_cast_fp16")]; tensor input_215_cast_fp16 = add(x = input_209_cast_fp16, y = linear_54_cast_fp16)[name = tensor("input_215_cast_fp16")]; tensor x_105_axes_0 = const()[name = tensor("x_105_axes_0"), val = tensor([-1])]; tensor layers_10_norm_ff_weight_to_fp16 = const()[name = tensor("layers_10_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158154816)))]; tensor x_105_cast_fp16 = layer_norm(axes = x_105_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_ff_weight_to_fp16, x = input_215_cast_fp16)[name = tensor("x_105_cast_fp16")]; tensor input_217_perm_0 = const()[name = tensor("input_217_perm_0"), val = tensor([0, 2, 1])]; tensor input_219_pad_type_0 = const()[name = tensor("input_219_pad_type_0"), val = tensor("valid")]; tensor input_219_strides_0 = const()[name = tensor("input_219_strides_0"), val = tensor([1])]; tensor input_219_pad_0 = const()[name = tensor("input_219_pad_0"), val = tensor([0, 0])]; tensor input_219_dilations_0 = const()[name = tensor("input_219_dilations_0"), val = tensor([1])]; tensor input_219_groups_0 = const()[name = tensor("input_219_groups_0"), val = tensor(1)]; tensor layers_10_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_10_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158156416)))]; tensor input_217_cast_fp16 = transpose(perm = input_217_perm_0, x = x_105_cast_fp16)[name = tensor("transpose_131")]; tensor input_219_cast_fp16 = conv(dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = layers_10_ffn_conv1_weight_to_fp16, x = input_217_cast_fp16)[name = tensor("input_219_cast_fp16")]; tensor input_221_mode_0 = const()[name = tensor("input_221_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor("input_221_cast_fp16")]; tensor x_107_pad_type_0 = const()[name = tensor("x_107_pad_type_0"), val = tensor("valid")]; tensor x_107_strides_0 = const()[name = tensor("x_107_strides_0"), val = tensor([1])]; tensor x_107_pad_0 = const()[name = tensor("x_107_pad_0"), val = tensor([0, 0])]; tensor x_107_dilations_0 = const()[name = tensor("x_107_dilations_0"), val = tensor([1])]; tensor x_107_groups_0 = const()[name = tensor("x_107_groups_0"), val = tensor(1)]; tensor layers_10_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_10_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162875072)))]; tensor x_107_cast_fp16 = conv(dilations = x_107_dilations_0, groups = x_107_groups_0, pad = x_107_pad_0, pad_type = x_107_pad_type_0, strides = x_107_strides_0, weight = layers_10_ffn_conv2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor("x_107_cast_fp16")]; tensor x_109_perm_0 = const()[name = tensor("x_109_perm_0"), val = tensor([0, 2, 1])]; tensor x_109_cast_fp16 = transpose(perm = x_109_perm_0, x = x_107_cast_fp16)[name = tensor("transpose_130")]; tensor input_223_cast_fp16 = add(x = input_215_cast_fp16, y = x_109_cast_fp16)[name = tensor("input_223_cast_fp16")]; tensor var_1964 = const()[name = tensor("op_1964"), val = tensor(-1)]; tensor x_111_axes_0 = const()[name = tensor("x_111_axes_0"), val = tensor([-1])]; tensor layers_11_norm_sa_weight_to_fp16 = const()[name = tensor("layers_11_norm_sa_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167593728)))]; tensor var_1967_to_fp16 = const()[name = tensor("op_1967_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_111_cast_fp16 = layer_norm(axes = x_111_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_sa_weight_to_fp16, x = input_223_cast_fp16)[name = tensor("x_111_cast_fp16")]; tensor layers_11_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_qkv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167595328)))]; tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_qkv_proj_weight_to_fp16, x = x_111_cast_fp16)[name = tensor("linear_55_cast_fp16")]; tensor var_1986 = const()[name = tensor("op_1986"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_cast_fp16 = reshape(shape = var_1986, x = linear_55_cast_fp16)[name = tensor("qkv_cast_fp16")]; tensor q_45_begin_0 = const()[name = tensor("q_45_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_45_end_0 = const()[name = tensor("q_45_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_45_end_mask_0 = const()[name = tensor("q_45_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_45_squeeze_mask_0 = const()[name = tensor("q_45_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_45_cast_fp16 = slice_by_index(begin = q_45_begin_0, end = q_45_end_0, end_mask = q_45_end_mask_0, squeeze_mask = q_45_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("q_45_cast_fp16")]; tensor k_45_begin_0 = const()[name = tensor("k_45_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor k_45_end_0 = const()[name = tensor("k_45_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor k_45_end_mask_0 = const()[name = tensor("k_45_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_45_squeeze_mask_0 = const()[name = tensor("k_45_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_45_cast_fp16 = slice_by_index(begin = k_45_begin_0, end = k_45_end_0, end_mask = k_45_end_mask_0, squeeze_mask = k_45_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("k_45_cast_fp16")]; tensor v_45_begin_0 = const()[name = tensor("v_45_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor v_45_end_0 = const()[name = tensor("v_45_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor v_45_end_mask_0 = const()[name = tensor("v_45_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_45_squeeze_mask_0 = const()[name = tensor("v_45_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_45_cast_fp16 = slice_by_index(begin = v_45_begin_0, end = v_45_end_0, end_mask = v_45_end_mask_0, squeeze_mask = v_45_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("v_45_cast_fp16")]; tensor var_1998_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position11)[name = tensor("op_1998_cast_fp16")]; tensor var_2000 = const()[name = tensor("op_2000"), val = tensor([1, 512, 1, 1])]; tensor var_1999_to_fp16_dtype_0 = const()[name = tensor("op_1999_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1998_cast_fp16_to_fp16 = cast(dtype = var_1999_to_fp16_dtype_0, x = var_1998_cast_fp16)[name = tensor("cast_163")]; tensor mask_cast_fp16 = reshape(shape = var_2000, x = var_1998_cast_fp16_to_fp16)[name = tensor("mask_cast_fp16")]; tensor k_new_reps_0 = const()[name = tensor("k_new_reps_0"), val = tensor([1, 512, 1, 1])]; tensor k_new_cast_fp16 = tile(reps = k_new_reps_0, x = k_45_cast_fp16)[name = tensor("k_new_cast_fp16")]; tensor v_new_reps_0 = const()[name = tensor("v_new_reps_0"), val = tensor([1, 512, 1, 1])]; tensor v_new_cast_fp16 = tile(reps = v_new_reps_0, x = v_45_cast_fp16)[name = tensor("v_new_cast_fp16")]; tensor var_1962_to_fp16 = const()[name = tensor("op_1962_to_fp16"), val = tensor(0x1p+0)]; tensor var_2006_cast_fp16 = sub(x = var_1962_to_fp16, y = mask_cast_fp16)[name = tensor("op_2006_cast_fp16")]; tensor var_2007_cast_fp16 = mul(x = cache_k11, y = var_2006_cast_fp16)[name = tensor("op_2007_cast_fp16")]; tensor var_2008_cast_fp16 = mul(x = k_new_cast_fp16, y = mask_cast_fp16)[name = tensor("op_2008_cast_fp16")]; tensor new_k = add(x = var_2007_cast_fp16, y = var_2008_cast_fp16)[name = tensor("new_k_cast_fp16")]; tensor var_2011_cast_fp16 = mul(x = cache_v11, y = var_2006_cast_fp16)[name = tensor("op_2011_cast_fp16")]; tensor var_2012_cast_fp16 = mul(x = v_new_cast_fp16, y = mask_cast_fp16)[name = tensor("op_2012_cast_fp16")]; tensor new_v = add(x = var_2011_cast_fp16, y = var_2012_cast_fp16)[name = tensor("new_v_cast_fp16")]; tensor var_2014_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position11)[name = tensor("op_2014_cast_fp16")]; tensor var_2016 = const()[name = tensor("op_2016"), val = tensor([1, 1, 1, 512])]; tensor var_2015_to_fp16_dtype_0 = const()[name = tensor("op_2015_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_2014_cast_fp16_to_fp16 = cast(dtype = var_2015_to_fp16_dtype_0, x = var_2014_cast_fp16)[name = tensor("cast_162")]; tensor var_2017_cast_fp16 = reshape(shape = var_2016, x = var_2014_cast_fp16_to_fp16)[name = tensor("op_2017_cast_fp16")]; tensor var_2021 = const()[name = tensor("op_2021"), val = tensor([0, 2, 1, 3])]; tensor var_2024_transpose_x_0 = const()[name = tensor("op_2024_transpose_x_0"), val = tensor(false)]; tensor var_2024_transpose_y_0 = const()[name = tensor("op_2024_transpose_y_0"), val = tensor(false)]; tensor transpose_116_perm_0 = const()[name = tensor("transpose_116_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_117_perm_0 = const()[name = tensor("transpose_117_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = new_k)[name = tensor("transpose_127")]; tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = q_45_cast_fp16)[name = tensor("transpose_128")]; tensor var_2024_cast_fp16 = matmul(transpose_x = var_2024_transpose_x_0, transpose_y = var_2024_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor("op_2024_cast_fp16")]; tensor var_2025_to_fp16 = const()[name = tensor("op_2025_to_fp16"), val = tensor(0x1p-3)]; tensor attn_89_cast_fp16 = mul(x = var_2024_cast_fp16, y = var_2025_to_fp16)[name = tensor("attn_89_cast_fp16")]; tensor var_2027_cast_fp16 = sub(x = var_1962_to_fp16, y = var_2017_cast_fp16)[name = tensor("op_2027_cast_fp16")]; tensor var_2028_to_fp16 = const()[name = tensor("op_2028_to_fp16"), val = tensor(-0x1.d4cp+14)]; tensor var_2029_cast_fp16 = mul(x = var_2027_cast_fp16, y = var_2028_to_fp16)[name = tensor("op_2029_cast_fp16")]; tensor input_225_cast_fp16 = add(x = attn_89_cast_fp16, y = var_2029_cast_fp16)[name = tensor("input_225_cast_fp16")]; tensor attn_91_cast_fp16 = softmax(axis = var_1964, x = input_225_cast_fp16)[name = tensor("attn_91_cast_fp16")]; tensor out_45_transpose_x_0 = const()[name = tensor("out_45_transpose_x_0"), val = tensor(false)]; tensor out_45_transpose_y_0 = const()[name = tensor("out_45_transpose_y_0"), val = tensor(false)]; tensor v4_cast_fp16 = transpose(perm = var_2021, x = new_v)[name = tensor("transpose_129")]; tensor out_45_cast_fp16 = matmul(transpose_x = out_45_transpose_x_0, transpose_y = out_45_transpose_y_0, x = attn_91_cast_fp16, y = v4_cast_fp16)[name = tensor("out_45_cast_fp16")]; tensor var_2033_perm_0 = const()[name = tensor("op_2033_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2034 = const()[name = tensor("op_2034"), val = tensor([1, 1, -1])]; tensor var_2033_cast_fp16 = transpose(perm = var_2033_perm_0, x = out_45_cast_fp16)[name = tensor("transpose_126")]; tensor input_227_cast_fp16 = reshape(shape = var_2034, x = var_2033_cast_fp16)[name = tensor("input_227_cast_fp16")]; tensor layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171134336)))]; tensor linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_227_cast_fp16)[name = tensor("linear_56_cast_fp16")]; tensor var_2038_to_fp16 = const()[name = tensor("op_2038_to_fp16"), val = tensor(0x1p+0)]; tensor var_2039 = add(x = position11, y = var_2038_to_fp16)[name = tensor("op_2039_cast_fp16")]; tensor input_229_cast_fp16 = add(x = input_223_cast_fp16, y = linear_56_cast_fp16)[name = tensor("input_229_cast_fp16")]; tensor x_113_axes_0 = const()[name = tensor("x_113_axes_0"), val = tensor([-1])]; tensor layers_11_norm_xa_query_weight_to_fp16 = const()[name = tensor("layers_11_norm_xa_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172314048)))]; tensor x_113_cast_fp16 = layer_norm(axes = x_113_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_xa_query_weight_to_fp16, x = input_229_cast_fp16)[name = tensor("x_113_cast_fp16")]; tensor memory_axes_0 = const()[name = tensor("memory_axes_0"), val = tensor([-1])]; tensor layers_11_norm_xa_memory_weight_to_fp16 = const()[name = tensor("layers_11_norm_xa_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172315648)))]; tensor memory_cast_fp16 = layer_norm(axes = memory_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor("memory_cast_fp16")]; tensor layers_11_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_11_cross_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172317248)))]; tensor linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_11_cross_attn_q_proj_weight_to_fp16, x = x_113_cast_fp16)[name = tensor("linear_57_cast_fp16")]; tensor var_2060 = const()[name = tensor("op_2060"), val = tensor([1, 1, 1, 128])]; tensor var_2061_cast_fp16 = reshape(shape = var_2060, x = linear_57_cast_fp16)[name = tensor("op_2061_cast_fp16")]; tensor layers_11_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor("layers_11_cross_attn_kv_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172513920)))]; tensor linear_58_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_11_cross_attn_kv_proj_weight_to_fp16, x = memory_cast_fp16)[name = tensor("linear_58_cast_fp16")]; tensor var_2065 = const()[name = tensor("op_2065"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_cast_fp16 = reshape(shape = var_2065, x = linear_58_cast_fp16)[name = tensor("kv_cast_fp16")]; tensor var_2069_begin_0 = const()[name = tensor("op_2069_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_2069_end_0 = const()[name = tensor("op_2069_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor var_2069_end_mask_0 = const()[name = tensor("op_2069_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_2069_squeeze_mask_0 = const()[name = tensor("op_2069_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_2069_cast_fp16 = slice_by_index(begin = var_2069_begin_0, end = var_2069_end_0, end_mask = var_2069_end_mask_0, squeeze_mask = var_2069_squeeze_mask_0, x = kv_cast_fp16)[name = tensor("op_2069_cast_fp16")]; tensor var_2073_begin_0 = const()[name = tensor("op_2073_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor var_2073_end_0 = const()[name = tensor("op_2073_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor var_2073_end_mask_0 = const()[name = tensor("op_2073_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor var_2073_squeeze_mask_0 = const()[name = tensor("op_2073_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor var_2073_cast_fp16 = slice_by_index(begin = var_2073_begin_0, end = var_2073_end_0, end_mask = var_2073_end_mask_0, squeeze_mask = var_2073_squeeze_mask_0, x = kv_cast_fp16)[name = tensor("op_2073_cast_fp16")]; tensor v_perm_0 = const()[name = tensor("v_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2076_transpose_x_0 = const()[name = tensor("op_2076_transpose_x_0"), val = tensor(false)]; tensor var_2076_transpose_y_0 = const()[name = tensor("op_2076_transpose_y_0"), val = tensor(false)]; tensor transpose_118_perm_0 = const()[name = tensor("transpose_118_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_119_perm_0 = const()[name = tensor("transpose_119_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = var_2069_cast_fp16)[name = tensor("transpose_123")]; tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = var_2061_cast_fp16)[name = tensor("transpose_124")]; tensor var_2076_cast_fp16 = matmul(transpose_x = var_2076_transpose_x_0, transpose_y = var_2076_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor("op_2076_cast_fp16")]; tensor var_2077_to_fp16 = const()[name = tensor("op_2077_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_93_cast_fp16 = mul(x = var_2076_cast_fp16, y = var_2077_to_fp16)[name = tensor("attn_93_cast_fp16")]; tensor input_231_cast_fp16 = add(x = attn_93_cast_fp16, y = var_214_cast_fp16)[name = tensor("input_231_cast_fp16")]; tensor attn_cast_fp16 = softmax(axis = var_1964, x = input_231_cast_fp16)[name = tensor("attn_cast_fp16")]; tensor out_transpose_x_0 = const()[name = tensor("out_transpose_x_0"), val = tensor(false)]; tensor out_transpose_y_0 = const()[name = tensor("out_transpose_y_0"), val = tensor(false)]; tensor v_cast_fp16 = transpose(perm = v_perm_0, x = var_2073_cast_fp16)[name = tensor("transpose_125")]; tensor out_cast_fp16 = matmul(transpose_x = out_transpose_x_0, transpose_y = out_transpose_y_0, x = attn_cast_fp16, y = v_cast_fp16)[name = tensor("out_cast_fp16")]; tensor var_2088_perm_0 = const()[name = tensor("op_2088_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2089 = const()[name = tensor("op_2089"), val = tensor([1, 1, -1])]; tensor var_2088_cast_fp16 = transpose(perm = var_2088_perm_0, x = out_cast_fp16)[name = tensor("transpose_122")]; tensor input_233_cast_fp16 = reshape(shape = var_2089, x = var_2088_cast_fp16)[name = tensor("input_233_cast_fp16")]; tensor layers_11_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_11_cross_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172907200)))]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_cross_attn_o_proj_weight_to_fp16, x = input_233_cast_fp16)[name = tensor("linear_59_cast_fp16")]; tensor input_235_cast_fp16 = add(x = input_229_cast_fp16, y = linear_59_cast_fp16)[name = tensor("input_235_cast_fp16")]; tensor x_115_axes_0 = const()[name = tensor("x_115_axes_0"), val = tensor([-1])]; tensor layers_11_norm_ff_weight_to_fp16 = const()[name = tensor("layers_11_norm_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173103872)))]; tensor x_115_cast_fp16 = layer_norm(axes = x_115_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_ff_weight_to_fp16, x = input_235_cast_fp16)[name = tensor("x_115_cast_fp16")]; tensor input_237_perm_0 = const()[name = tensor("input_237_perm_0"), val = tensor([0, 2, 1])]; tensor input_239_pad_type_0 = const()[name = tensor("input_239_pad_type_0"), val = tensor("valid")]; tensor input_239_strides_0 = const()[name = tensor("input_239_strides_0"), val = tensor([1])]; tensor input_239_pad_0 = const()[name = tensor("input_239_pad_0"), val = tensor([0, 0])]; tensor input_239_dilations_0 = const()[name = tensor("input_239_dilations_0"), val = tensor([1])]; tensor input_239_groups_0 = const()[name = tensor("input_239_groups_0"), val = tensor(1)]; tensor layers_11_ffn_conv1_weight_to_fp16 = const()[name = tensor("layers_11_ffn_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173105472)))]; tensor input_237_cast_fp16 = transpose(perm = input_237_perm_0, x = x_115_cast_fp16)[name = tensor("transpose_121")]; tensor input_239_cast_fp16 = conv(dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = layers_11_ffn_conv1_weight_to_fp16, x = input_237_cast_fp16)[name = tensor("input_239_cast_fp16")]; tensor input_241_mode_0 = const()[name = tensor("input_241_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor input_241_cast_fp16 = gelu(mode = input_241_mode_0, x = input_239_cast_fp16)[name = tensor("input_241_cast_fp16")]; tensor x_117_pad_type_0 = const()[name = tensor("x_117_pad_type_0"), val = tensor("valid")]; tensor x_117_strides_0 = const()[name = tensor("x_117_strides_0"), val = tensor([1])]; tensor x_117_pad_0 = const()[name = tensor("x_117_pad_0"), val = tensor([0, 0])]; tensor x_117_dilations_0 = const()[name = tensor("x_117_dilations_0"), val = tensor([1])]; tensor x_117_groups_0 = const()[name = tensor("x_117_groups_0"), val = tensor(1)]; tensor layers_11_ffn_conv2_weight_to_fp16 = const()[name = tensor("layers_11_ffn_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177824128)))]; tensor x_117_cast_fp16 = conv(dilations = x_117_dilations_0, groups = x_117_groups_0, pad = x_117_pad_0, pad_type = x_117_pad_type_0, strides = x_117_strides_0, weight = layers_11_ffn_conv2_weight_to_fp16, x = input_241_cast_fp16)[name = tensor("x_117_cast_fp16")]; tensor x_perm_0 = const()[name = tensor("x_perm_0"), val = tensor([0, 2, 1])]; tensor x_cast_fp16 = transpose(perm = x_perm_0, x = x_117_cast_fp16)[name = tensor("transpose_120")]; tensor input_243_cast_fp16 = add(x = input_235_cast_fp16, y = x_cast_fp16)[name = tensor("input_243_cast_fp16")]; tensor input_axes_0 = const()[name = tensor("input_axes_0"), val = tensor([-1])]; tensor norm_out_weight_to_fp16 = const()[name = tensor("norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182542784)))]; tensor var_2121_to_fp16 = const()[name = tensor("op_2121_to_fp16"), val = tensor(0x1.5p-17)]; tensor input = layer_norm(axes = input_axes_0, epsilon = var_2121_to_fp16, gamma = norm_out_weight_to_fp16, x = input_243_cast_fp16)[name = tensor("input_cast_fp16")]; tensor final_proj_weight_to_fp16 = const()[name = tensor("final_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182544384)))]; tensor final_proj_bias_to_fp16 = const()[name = tensor("final_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207415360)))]; tensor var_2129 = linear(bias = final_proj_bias_to_fp16, weight = final_proj_weight_to_fp16, x = input)[name = tensor("linear_60_cast_fp16")]; } -> (var_2129, input, new_k_1, new_v_1, var_169, new_k_3, new_v_3, var_339, new_k_5, new_v_5, var_509, new_k_7, new_v_7, var_679, new_k_9, new_v_9, var_849, new_k_11, new_v_11, var_1019, new_k_13, new_v_13, var_1189, new_k_15, new_v_15, var_1359, new_k_17, new_v_17, var_1529, new_k_19, new_v_19, var_1699, new_k_21, new_v_21, var_1869, new_k, new_v, var_2039); }