alexwengg's picture
Upload 97 files
046614c verified
program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
{
func main<ios17>(tensor<fp16, [1, 1, 768]> audio_embed, tensor<fp16, [1, 512, 12, 64]> cache_k0, tensor<fp16, [1, 512, 12, 64]> cache_k1, tensor<fp16, [1, 512, 12, 64]> cache_k10, tensor<fp16, [1, 512, 12, 64]> cache_k11, tensor<fp16, [1, 512, 12, 64]> cache_k2, tensor<fp16, [1, 512, 12, 64]> cache_k3, tensor<fp16, [1, 512, 12, 64]> cache_k4, tensor<fp16, [1, 512, 12, 64]> cache_k5, tensor<fp16, [1, 512, 12, 64]> cache_k6, tensor<fp16, [1, 512, 12, 64]> cache_k7, tensor<fp16, [1, 512, 12, 64]> cache_k8, tensor<fp16, [1, 512, 12, 64]> cache_k9, tensor<fp16, [1, 512, 12, 64]> cache_v0, tensor<fp16, [1, 512, 12, 64]> cache_v1, tensor<fp16, [1, 512, 12, 64]> cache_v10, tensor<fp16, [1, 512, 12, 64]> cache_v11, tensor<fp16, [1, 512, 12, 64]> cache_v2, tensor<fp16, [1, 512, 12, 64]> cache_v3, tensor<fp16, [1, 512, 12, 64]> cache_v4, tensor<fp16, [1, 512, 12, 64]> cache_v5, tensor<fp16, [1, 512, 12, 64]> cache_v6, tensor<fp16, [1, 512, 12, 64]> cache_v7, tensor<fp16, [1, 512, 12, 64]> cache_v8, tensor<fp16, [1, 512, 12, 64]> cache_v9, tensor<fp32, [1, 256]> encoder_mask, tensor<fp16, [1, 256, 768]> encoder_output, tensor<fp16, [1]> position0, tensor<fp16, [1]> position1, tensor<fp16, [1]> position10, tensor<fp16, [1]> position11, tensor<fp16, [1]> position2, tensor<fp16, [1]> position3, tensor<fp16, [1]> position4, tensor<fp16, [1]> position5, tensor<fp16, [1]> position6, tensor<fp16, [1]> position7, tensor<fp16, [1]> position8, tensor<fp16, [1]> position9) {
tensor<string, []> cast_190_dtype_0 = const()[name = tensor<string, []>("cast_190_dtype_0"), val = tensor<string, []>("bool")];
tensor<bool, [1, 256]> cast_190 = cast(dtype = cast_190_dtype_0, x = encoder_mask)[name = tensor<string, []>("cast_190")];
tensor<int32, []> var_75_batch_dims_0 = const()[name = tensor<string, []>("op_75_batch_dims_0"), val = tensor<int32, []>(0)];
tensor<bool, []> var_75_validate_indices_0 = const()[name = tensor<string, []>("op_75_validate_indices_0"), val = tensor<bool, []>(false)];
tensor<fp16, [2048, 768]> position_embeddings_weight_to_fp16 = const()[name = tensor<string, []>("position_embeddings_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
tensor<string, []> position0_to_int32_to_int16_dtype_0 = const()[name = tensor<string, []>("position0_to_int32_to_int16_dtype_0"), val = tensor<string, []>("int16")];
tensor<string, []> cast_161_dtype_0 = const()[name = tensor<string, []>("cast_161_dtype_0"), val = tensor<string, []>("int32")];
tensor<int32, []> greater_equal_0_y_0 = const()[name = tensor<string, []>("greater_equal_0_y_0"), val = tensor<int32, []>(0)];
tensor<int16, [1]> position0_to_int16 = cast(dtype = position0_to_int32_to_int16_dtype_0, x = position0)[name = tensor<string, []>("cast_189")];
tensor<int32, [1]> cast_161 = cast(dtype = cast_161_dtype_0, x = position0_to_int16)[name = tensor<string, []>("cast_188")];
tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_161, y = greater_equal_0_y_0)[name = tensor<string, []>("greater_equal_0")];
tensor<int32, []> slice_by_index_0 = const()[name = tensor<string, []>("slice_by_index_0"), val = tensor<int32, []>(2048)];
tensor<int32, [1]> add_0 = add(x = cast_161, y = slice_by_index_0)[name = tensor<string, []>("add_0")];
tensor<int32, [1]> select_0 = select(a = cast_161, b = add_0, cond = greater_equal_0)[name = tensor<string, []>("select_0")];
tensor<int32, []> var_75_cast_fp16_cast_uint16_axis_0 = const()[name = tensor<string, []>("op_75_cast_fp16_cast_uint16_axis_0"), val = tensor<int32, []>(0)];
tensor<string, []> select_0_to_int16_dtype_0 = const()[name = tensor<string, []>("select_0_to_int16_dtype_0"), val = tensor<string, []>("int16")];
tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor<string, []>("cast_187")];
tensor<fp16, [1, 768]> var_75_cast_fp16_cast_uint16_cast_uint16 = gather(axis = var_75_cast_fp16_cast_uint16_axis_0, batch_dims = var_75_batch_dims_0, indices = select_0_to_int16, validate_indices = var_75_validate_indices_0, x = position_embeddings_weight_to_fp16)[name = tensor<string, []>("op_75_cast_fp16_cast_uint16_cast_uint16")];
tensor<int32, [1]> var_77_axes_0 = const()[name = tensor<string, []>("op_77_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [1, 1, 768]> var_77_cast_fp16 = expand_dims(axes = var_77_axes_0, x = var_75_cast_fp16_cast_uint16_cast_uint16)[name = tensor<string, []>("op_77_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_3_cast_fp16 = add(x = audio_embed, y = var_77_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
tensor<int32, []> var_94 = const()[name = tensor<string, []>("op_94"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_1_axes_0 = const()[name = tensor<string, []>("x_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_0_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3145856)))];
tensor<fp16, []> var_97_to_fp16 = const()[name = tensor<string, []>("op_97_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_sa_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("x_1_cast_fp16")];
tensor<fp16, [2304, 768]> layers_0_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3147456)))];
tensor<fp16, [2304]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6686464)))];
tensor<fp16, [1, 1, 2304]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_qkv_proj_weight_to_fp16, x = x_1_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
tensor<int32, [5]> var_116 = const()[name = tensor<string, []>("op_116"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_3_cast_fp16 = reshape(shape = var_116, x = linear_0_cast_fp16)[name = tensor<string, []>("qkv_3_cast_fp16")];
tensor<int32, [5]> q_1_begin_0 = const()[name = tensor<string, []>("q_1_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_1_end_0 = const()[name = tensor<string, []>("q_1_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_1_end_mask_0 = const()[name = tensor<string, []>("q_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_1_squeeze_mask_0 = const()[name = tensor<string, []>("q_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
tensor<int32, [5]> k_1_begin_0 = const()[name = tensor<string, []>("k_1_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_1_end_0 = const()[name = tensor<string, []>("k_1_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_1_end_mask_0 = const()[name = tensor<string, []>("k_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_1_squeeze_mask_0 = const()[name = tensor<string, []>("k_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_1_cast_fp16 = slice_by_index(begin = k_1_begin_0, end = k_1_end_0, end_mask = k_1_end_mask_0, squeeze_mask = k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
tensor<int32, [5]> v_1_begin_0 = const()[name = tensor<string, []>("v_1_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_1_end_0 = const()[name = tensor<string, []>("v_1_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_1_end_mask_0 = const()[name = tensor<string, []>("v_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_1_squeeze_mask_0 = const()[name = tensor<string, []>("v_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_1_cast_fp16 = slice_by_index(begin = v_1_begin_0, end = v_1_end_0, end_mask = v_1_end_mask_0, squeeze_mask = v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("v_1_cast_fp16")];
tensor<fp16, [512]> positions_range_1_promoted_to_fp16 = const()[name = tensor<string, []>("positions_range_1_promoted_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6691136)))];
tensor<bool, [512]> var_128_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position0)[name = tensor<string, []>("op_128_cast_fp16")];
tensor<int32, [4]> var_130 = const()[name = tensor<string, []>("op_130"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_129_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_129_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_128_cast_fp16_to_fp16 = cast(dtype = var_129_to_fp16_dtype_0, x = var_128_cast_fp16)[name = tensor<string, []>("cast_186")];
tensor<fp16, [1, 512, 1, 1]> mask_1_cast_fp16 = reshape(shape = var_130, x = var_128_cast_fp16_to_fp16)[name = tensor<string, []>("mask_1_cast_fp16")];
tensor<int32, [4]> k_new_1_reps_0 = const()[name = tensor<string, []>("k_new_1_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_1_cast_fp16 = tile(reps = k_new_1_reps_0, x = k_1_cast_fp16)[name = tensor<string, []>("k_new_1_cast_fp16")];
tensor<int32, [4]> v_new_1_reps_0 = const()[name = tensor<string, []>("v_new_1_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_1_cast_fp16 = tile(reps = v_new_1_reps_0, x = v_1_cast_fp16)[name = tensor<string, []>("v_new_1_cast_fp16")];
tensor<fp16, []> var_92_to_fp16 = const()[name = tensor<string, []>("op_92_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_136_cast_fp16 = sub(x = var_92_to_fp16, y = mask_1_cast_fp16)[name = tensor<string, []>("op_136_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_137_cast_fp16 = mul(x = cache_k0, y = var_136_cast_fp16)[name = tensor<string, []>("op_137_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_138_cast_fp16 = mul(x = k_new_1_cast_fp16, y = mask_1_cast_fp16)[name = tensor<string, []>("op_138_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_1 = add(x = var_137_cast_fp16, y = var_138_cast_fp16)[name = tensor<string, []>("new_k_1_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_141_cast_fp16 = mul(x = cache_v0, y = var_136_cast_fp16)[name = tensor<string, []>("op_141_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_142_cast_fp16 = mul(x = v_new_1_cast_fp16, y = mask_1_cast_fp16)[name = tensor<string, []>("op_142_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_1 = add(x = var_141_cast_fp16, y = var_142_cast_fp16)[name = tensor<string, []>("new_v_1_cast_fp16")];
tensor<bool, [512]> var_144_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position0)[name = tensor<string, []>("op_144_cast_fp16")];
tensor<int32, [4]> var_146 = const()[name = tensor<string, []>("op_146"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_145_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_145_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_144_cast_fp16_to_fp16 = cast(dtype = var_145_to_fp16_dtype_0, x = var_144_cast_fp16)[name = tensor<string, []>("cast_185")];
tensor<fp16, [1, 1, 1, 512]> var_147_cast_fp16 = reshape(shape = var_146, x = var_144_cast_fp16_to_fp16)[name = tensor<string, []>("op_147_cast_fp16")];
tensor<int32, [4]> var_151 = const()[name = tensor<string, []>("op_151"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_154_transpose_x_0 = const()[name = tensor<string, []>("op_154_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_154_transpose_y_0 = const()[name = tensor<string, []>("op_154_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_72_perm_0 = const()[name = tensor<string, []>("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_73_perm_0 = const()[name = tensor<string, []>("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_73 = transpose(perm = transpose_73_perm_0, x = new_k_1)[name = tensor<string, []>("transpose_237")];
tensor<fp16, [1, 12, 1, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = q_1_cast_fp16)[name = tensor<string, []>("transpose_238")];
tensor<fp16, [1, 12, 1, 512]> var_154_cast_fp16 = matmul(transpose_x = var_154_transpose_x_0, transpose_y = var_154_transpose_y_0, x = transpose_72, y = transpose_73)[name = tensor<string, []>("op_154_cast_fp16")];
tensor<fp16, []> var_155_to_fp16 = const()[name = tensor<string, []>("op_155_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_1_cast_fp16 = mul(x = var_154_cast_fp16, y = var_155_to_fp16)[name = tensor<string, []>("attn_1_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_157_cast_fp16 = sub(x = var_92_to_fp16, y = var_147_cast_fp16)[name = tensor<string, []>("op_157_cast_fp16")];
tensor<fp16, []> var_158_to_fp16 = const()[name = tensor<string, []>("op_158_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_159_cast_fp16 = mul(x = var_157_cast_fp16, y = var_158_to_fp16)[name = tensor<string, []>("op_159_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_5_cast_fp16 = add(x = attn_1_cast_fp16, y = var_159_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_3_cast_fp16 = softmax(axis = var_94, x = input_5_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
tensor<bool, []> out_1_transpose_x_0 = const()[name = tensor<string, []>("out_1_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_1_transpose_y_0 = const()[name = tensor<string, []>("out_1_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_1_cast_fp16 = transpose(perm = var_151, x = new_v_1)[name = tensor<string, []>("transpose_239")];
tensor<fp16, [1, 12, 1, 64]> out_1_cast_fp16 = matmul(transpose_x = out_1_transpose_x_0, transpose_y = out_1_transpose_y_0, x = attn_3_cast_fp16, y = v4_1_cast_fp16)[name = tensor<string, []>("out_1_cast_fp16")];
tensor<int32, [4]> var_163_perm_0 = const()[name = tensor<string, []>("op_163_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_164 = const()[name = tensor<string, []>("op_164"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_163_cast_fp16 = transpose(perm = var_163_perm_0, x = out_1_cast_fp16)[name = tensor<string, []>("transpose_236")];
tensor<fp16, [1, 1, 768]> input_7_cast_fp16 = reshape(shape = var_164, x = var_163_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
tensor<fp16, [768, 768]> layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6692224)))];
tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7871936)))];
tensor<fp16, [1, 1, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
tensor<fp16, []> var_168_to_fp16 = const()[name = tensor<string, []>("op_168_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_169 = add(x = position0, y = var_168_to_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
tensor<int32, [1]> x_3_axes_0 = const()[name = tensor<string, []>("x_3_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_0_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7873536)))];
tensor<fp16, [1, 1, 768]> x_3_cast_fp16 = layer_norm(axes = x_3_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_xa_query_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
tensor<int32, [1]> memory_1_axes_0 = const()[name = tensor<string, []>("memory_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_0_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7875136)))];
tensor<fp16, [1, 256, 768]> memory_1_cast_fp16 = layer_norm(axes = memory_1_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_1_cast_fp16")];
tensor<fp16, [128, 768]> layers_0_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7876736)))];
tensor<fp16, [128]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8073408)))];
tensor<fp16, [1, 1, 128]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_0_cross_attn_q_proj_weight_to_fp16, x = x_3_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
tensor<int32, [4]> var_190 = const()[name = tensor<string, []>("op_190"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_191_cast_fp16 = reshape(shape = var_190, x = linear_2_cast_fp16)[name = tensor<string, []>("op_191_cast_fp16")];
tensor<fp16, [256, 768]> layers_0_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8073728)))];
tensor<fp16, [256]> linear_3_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_3_bias_0_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8467008)))];
tensor<fp16, [1, 256, 256]> linear_3_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_0_cross_attn_kv_proj_weight_to_fp16, x = memory_1_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
tensor<int32, [5]> var_195 = const()[name = tensor<string, []>("op_195"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_1_cast_fp16 = reshape(shape = var_195, x = linear_3_cast_fp16)[name = tensor<string, []>("kv_1_cast_fp16")];
tensor<int32, [5]> var_199_begin_0 = const()[name = tensor<string, []>("op_199_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_199_end_0 = const()[name = tensor<string, []>("op_199_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_199_end_mask_0 = const()[name = tensor<string, []>("op_199_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_199_squeeze_mask_0 = const()[name = tensor<string, []>("op_199_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, squeeze_mask = var_199_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor<string, []>("op_199_cast_fp16")];
tensor<int32, [5]> var_203_begin_0 = const()[name = tensor<string, []>("op_203_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_203_end_0 = const()[name = tensor<string, []>("op_203_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_203_end_mask_0 = const()[name = tensor<string, []>("op_203_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_203_squeeze_mask_0 = const()[name = tensor<string, []>("op_203_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, squeeze_mask = var_203_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor<string, []>("op_203_cast_fp16")];
tensor<int32, [4]> v_3_perm_0 = const()[name = tensor<string, []>("v_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_206_transpose_x_0 = const()[name = tensor<string, []>("op_206_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_206_transpose_y_0 = const()[name = tensor<string, []>("op_206_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_74_perm_0 = const()[name = tensor<string, []>("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_75_perm_0 = const()[name = tensor<string, []>("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_75 = transpose(perm = transpose_75_perm_0, x = var_199_cast_fp16)[name = tensor<string, []>("transpose_233")];
tensor<fp16, [1, 1, 1, 128]> transpose_74 = transpose(perm = transpose_74_perm_0, x = var_191_cast_fp16)[name = tensor<string, []>("transpose_234")];
tensor<fp16, [1, 1, 1, 256]> var_206_cast_fp16 = matmul(transpose_x = var_206_transpose_x_0, transpose_y = var_206_transpose_y_0, x = transpose_74, y = transpose_75)[name = tensor<string, []>("op_206_cast_fp16")];
tensor<fp16, []> var_207_to_fp16 = const()[name = tensor<string, []>("op_207_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_5_cast_fp16 = mul(x = var_206_cast_fp16, y = var_207_to_fp16)[name = tensor<string, []>("attn_5_cast_fp16")];
tensor<int32, [1]> var_210_axes_0 = const()[name = tensor<string, []>("op_210_axes_0"), val = tensor<int32, [1]>([1])];
tensor<string, []> var_209_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_209_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [1, 256]> encoder_mask_to_fp16 = cast(dtype = var_209_to_fp16_dtype_0, x = cast_190)[name = tensor<string, []>("cast_184")];
tensor<fp16, [1, 1, 256]> var_210_cast_fp16 = expand_dims(axes = var_210_axes_0, x = encoder_mask_to_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
tensor<int32, [1]> var_211_axes_0 = const()[name = tensor<string, []>("op_211_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 1, 1, 256]> var_211_cast_fp16 = expand_dims(axes = var_211_axes_0, x = var_210_cast_fp16)[name = tensor<string, []>("op_211_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> var_212_cast_fp16 = sub(x = var_92_to_fp16, y = var_211_cast_fp16)[name = tensor<string, []>("op_212_cast_fp16")];
tensor<fp16, []> var_213_to_fp16 = const()[name = tensor<string, []>("op_213_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 256]> var_214_cast_fp16 = mul(x = var_212_cast_fp16, y = var_213_to_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_11_cast_fp16 = add(x = attn_5_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_7_cast_fp16 = softmax(axis = var_94, x = input_11_cast_fp16)[name = tensor<string, []>("attn_7_cast_fp16")];
tensor<bool, []> out_3_transpose_x_0 = const()[name = tensor<string, []>("out_3_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_3_transpose_y_0 = const()[name = tensor<string, []>("out_3_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_3_cast_fp16 = transpose(perm = v_3_perm_0, x = var_203_cast_fp16)[name = tensor<string, []>("transpose_235")];
tensor<fp16, [1, 1, 1, 128]> out_3_cast_fp16 = matmul(transpose_x = out_3_transpose_x_0, transpose_y = out_3_transpose_y_0, x = attn_7_cast_fp16, y = v_3_cast_fp16)[name = tensor<string, []>("out_3_cast_fp16")];
tensor<int32, [4]> var_218_perm_0 = const()[name = tensor<string, []>("op_218_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_219 = const()[name = tensor<string, []>("op_219"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_218_cast_fp16 = transpose(perm = var_218_perm_0, x = out_3_cast_fp16)[name = tensor<string, []>("transpose_232")];
tensor<fp16, [1, 1, 128]> input_13_cast_fp16 = reshape(shape = var_219, x = var_218_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
tensor<fp16, [768, 128]> layers_0_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8467584)))];
tensor<fp16, [1, 1, 768]> linear_4_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_cross_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_4_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
tensor<int32, [1]> x_5_axes_0 = const()[name = tensor<string, []>("x_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_0_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8664256)))];
tensor<fp16, [1, 1, 768]> x_5_cast_fp16 = layer_norm(axes = x_5_axes_0, epsilon = var_97_to_fp16, gamma = layers_0_norm_ff_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
tensor<int32, [3]> input_17_perm_0 = const()[name = tensor<string, []>("input_17_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_0_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8665856)))];
tensor<fp16, [1, 768, 1]> input_17_cast_fp16 = transpose(perm = input_17_perm_0, x = x_5_cast_fp16)[name = tensor<string, []>("transpose_231")];
tensor<fp16, [1, 3072, 1]> input_19_cast_fp16 = conv(dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = layers_0_ffn_conv1_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
tensor<string, []> x_7_pad_type_0 = const()[name = tensor<string, []>("x_7_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_7_strides_0 = const()[name = tensor<string, []>("x_7_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_7_pad_0 = const()[name = tensor<string, []>("x_7_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_7_dilations_0 = const()[name = tensor<string, []>("x_7_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_7_groups_0 = const()[name = tensor<string, []>("x_7_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_0_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_0_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13384512)))];
tensor<fp16, [1, 768, 1]> x_7_cast_fp16 = conv(dilations = x_7_dilations_0, groups = x_7_groups_0, pad = x_7_pad_0, pad_type = x_7_pad_type_0, strides = x_7_strides_0, weight = layers_0_ffn_conv2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
tensor<int32, [3]> x_9_perm_0 = const()[name = tensor<string, []>("x_9_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_9_cast_fp16 = transpose(perm = x_9_perm_0, x = x_7_cast_fp16)[name = tensor<string, []>("transpose_230")];
tensor<fp16, [1, 1, 768]> input_23_cast_fp16 = add(x = input_15_cast_fp16, y = x_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
tensor<int32, []> var_264 = const()[name = tensor<string, []>("op_264"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_11_axes_0 = const()[name = tensor<string, []>("x_11_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_1_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18103168)))];
tensor<fp16, []> var_267_to_fp16 = const()[name = tensor<string, []>("op_267_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_11_cast_fp16 = layer_norm(axes = x_11_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_sa_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
tensor<fp16, [2304, 768]> layers_1_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18104768)))];
tensor<fp16, [1, 1, 2304]> linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_qkv_proj_weight_to_fp16, x = x_11_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
tensor<int32, [5]> var_286 = const()[name = tensor<string, []>("op_286"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_7_cast_fp16 = reshape(shape = var_286, x = linear_5_cast_fp16)[name = tensor<string, []>("qkv_7_cast_fp16")];
tensor<int32, [5]> q_5_begin_0 = const()[name = tensor<string, []>("q_5_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_5_end_0 = const()[name = tensor<string, []>("q_5_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_5_end_mask_0 = const()[name = tensor<string, []>("q_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_5_squeeze_mask_0 = const()[name = tensor<string, []>("q_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("q_5_cast_fp16")];
tensor<int32, [5]> k_5_begin_0 = const()[name = tensor<string, []>("k_5_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_5_end_0 = const()[name = tensor<string, []>("k_5_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_5_end_mask_0 = const()[name = tensor<string, []>("k_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_5_squeeze_mask_0 = const()[name = tensor<string, []>("k_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_5_cast_fp16 = slice_by_index(begin = k_5_begin_0, end = k_5_end_0, end_mask = k_5_end_mask_0, squeeze_mask = k_5_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
tensor<int32, [5]> v_5_begin_0 = const()[name = tensor<string, []>("v_5_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_5_end_0 = const()[name = tensor<string, []>("v_5_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_5_end_mask_0 = const()[name = tensor<string, []>("v_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_5_squeeze_mask_0 = const()[name = tensor<string, []>("v_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_5_cast_fp16 = slice_by_index(begin = v_5_begin_0, end = v_5_end_0, end_mask = v_5_end_mask_0, squeeze_mask = v_5_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("v_5_cast_fp16")];
tensor<bool, [512]> var_298_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position1)[name = tensor<string, []>("op_298_cast_fp16")];
tensor<int32, [4]> var_300 = const()[name = tensor<string, []>("op_300"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_299_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_299_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_298_cast_fp16_to_fp16 = cast(dtype = var_299_to_fp16_dtype_0, x = var_298_cast_fp16)[name = tensor<string, []>("cast_183")];
tensor<fp16, [1, 512, 1, 1]> mask_3_cast_fp16 = reshape(shape = var_300, x = var_298_cast_fp16_to_fp16)[name = tensor<string, []>("mask_3_cast_fp16")];
tensor<int32, [4]> k_new_3_reps_0 = const()[name = tensor<string, []>("k_new_3_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_3_cast_fp16 = tile(reps = k_new_3_reps_0, x = k_5_cast_fp16)[name = tensor<string, []>("k_new_3_cast_fp16")];
tensor<int32, [4]> v_new_3_reps_0 = const()[name = tensor<string, []>("v_new_3_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_3_cast_fp16 = tile(reps = v_new_3_reps_0, x = v_5_cast_fp16)[name = tensor<string, []>("v_new_3_cast_fp16")];
tensor<fp16, []> var_262_to_fp16 = const()[name = tensor<string, []>("op_262_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_306_cast_fp16 = sub(x = var_262_to_fp16, y = mask_3_cast_fp16)[name = tensor<string, []>("op_306_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_307_cast_fp16 = mul(x = cache_k1, y = var_306_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_308_cast_fp16 = mul(x = k_new_3_cast_fp16, y = mask_3_cast_fp16)[name = tensor<string, []>("op_308_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_3 = add(x = var_307_cast_fp16, y = var_308_cast_fp16)[name = tensor<string, []>("new_k_3_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_311_cast_fp16 = mul(x = cache_v1, y = var_306_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_312_cast_fp16 = mul(x = v_new_3_cast_fp16, y = mask_3_cast_fp16)[name = tensor<string, []>("op_312_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_3 = add(x = var_311_cast_fp16, y = var_312_cast_fp16)[name = tensor<string, []>("new_v_3_cast_fp16")];
tensor<bool, [512]> var_314_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position1)[name = tensor<string, []>("op_314_cast_fp16")];
tensor<int32, [4]> var_316 = const()[name = tensor<string, []>("op_316"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_315_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_315_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_314_cast_fp16_to_fp16 = cast(dtype = var_315_to_fp16_dtype_0, x = var_314_cast_fp16)[name = tensor<string, []>("cast_182")];
tensor<fp16, [1, 1, 1, 512]> var_317_cast_fp16 = reshape(shape = var_316, x = var_314_cast_fp16_to_fp16)[name = tensor<string, []>("op_317_cast_fp16")];
tensor<int32, [4]> var_321 = const()[name = tensor<string, []>("op_321"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_324_transpose_x_0 = const()[name = tensor<string, []>("op_324_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_324_transpose_y_0 = const()[name = tensor<string, []>("op_324_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_76_perm_0 = const()[name = tensor<string, []>("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_77_perm_0 = const()[name = tensor<string, []>("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_77 = transpose(perm = transpose_77_perm_0, x = new_k_3)[name = tensor<string, []>("transpose_227")];
tensor<fp16, [1, 12, 1, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = q_5_cast_fp16)[name = tensor<string, []>("transpose_228")];
tensor<fp16, [1, 12, 1, 512]> var_324_cast_fp16 = matmul(transpose_x = var_324_transpose_x_0, transpose_y = var_324_transpose_y_0, x = transpose_76, y = transpose_77)[name = tensor<string, []>("op_324_cast_fp16")];
tensor<fp16, []> var_325_to_fp16 = const()[name = tensor<string, []>("op_325_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_9_cast_fp16 = mul(x = var_324_cast_fp16, y = var_325_to_fp16)[name = tensor<string, []>("attn_9_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_327_cast_fp16 = sub(x = var_262_to_fp16, y = var_317_cast_fp16)[name = tensor<string, []>("op_327_cast_fp16")];
tensor<fp16, []> var_328_to_fp16 = const()[name = tensor<string, []>("op_328_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_329_cast_fp16 = mul(x = var_327_cast_fp16, y = var_328_to_fp16)[name = tensor<string, []>("op_329_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_25_cast_fp16 = add(x = attn_9_cast_fp16, y = var_329_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_11_cast_fp16 = softmax(axis = var_264, x = input_25_cast_fp16)[name = tensor<string, []>("attn_11_cast_fp16")];
tensor<bool, []> out_5_transpose_x_0 = const()[name = tensor<string, []>("out_5_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_5_transpose_y_0 = const()[name = tensor<string, []>("out_5_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_3_cast_fp16 = transpose(perm = var_321, x = new_v_3)[name = tensor<string, []>("transpose_229")];
tensor<fp16, [1, 12, 1, 64]> out_5_cast_fp16 = matmul(transpose_x = out_5_transpose_x_0, transpose_y = out_5_transpose_y_0, x = attn_11_cast_fp16, y = v4_3_cast_fp16)[name = tensor<string, []>("out_5_cast_fp16")];
tensor<int32, [4]> var_333_perm_0 = const()[name = tensor<string, []>("op_333_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_334 = const()[name = tensor<string, []>("op_334"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_333_cast_fp16 = transpose(perm = var_333_perm_0, x = out_5_cast_fp16)[name = tensor<string, []>("transpose_226")];
tensor<fp16, [1, 1, 768]> input_27_cast_fp16 = reshape(shape = var_334, x = var_333_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
tensor<fp16, [768, 768]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21643776)))];
tensor<fp16, [1, 1, 768]> linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
tensor<fp16, []> var_338_to_fp16 = const()[name = tensor<string, []>("op_338_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_339 = add(x = position1, y = var_338_to_fp16)[name = tensor<string, []>("op_339_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_6_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
tensor<int32, [1]> x_13_axes_0 = const()[name = tensor<string, []>("x_13_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_1_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22823488)))];
tensor<fp16, [1, 1, 768]> x_13_cast_fp16 = layer_norm(axes = x_13_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_xa_query_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
tensor<int32, [1]> memory_3_axes_0 = const()[name = tensor<string, []>("memory_3_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_1_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22825088)))];
tensor<fp16, [1, 256, 768]> memory_3_cast_fp16 = layer_norm(axes = memory_3_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_3_cast_fp16")];
tensor<fp16, [128, 768]> layers_1_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22826688)))];
tensor<fp16, [1, 1, 128]> linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_1_cross_attn_q_proj_weight_to_fp16, x = x_13_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
tensor<int32, [4]> var_360 = const()[name = tensor<string, []>("op_360"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_361_cast_fp16 = reshape(shape = var_360, x = linear_7_cast_fp16)[name = tensor<string, []>("op_361_cast_fp16")];
tensor<fp16, [256, 768]> layers_1_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23023360)))];
tensor<fp16, [1, 256, 256]> linear_8_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_1_cross_attn_kv_proj_weight_to_fp16, x = memory_3_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
tensor<int32, [5]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_3_cast_fp16 = reshape(shape = var_365, x = linear_8_cast_fp16)[name = tensor<string, []>("kv_3_cast_fp16")];
tensor<int32, [5]> var_369_begin_0 = const()[name = tensor<string, []>("op_369_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_369_end_0 = const()[name = tensor<string, []>("op_369_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_369_end_mask_0 = const()[name = tensor<string, []>("op_369_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_369_squeeze_mask_0 = const()[name = tensor<string, []>("op_369_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_369_cast_fp16 = slice_by_index(begin = var_369_begin_0, end = var_369_end_0, end_mask = var_369_end_mask_0, squeeze_mask = var_369_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor<string, []>("op_369_cast_fp16")];
tensor<int32, [5]> var_373_begin_0 = const()[name = tensor<string, []>("op_373_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_373_end_0 = const()[name = tensor<string, []>("op_373_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_373_end_mask_0 = const()[name = tensor<string, []>("op_373_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_373_squeeze_mask_0 = const()[name = tensor<string, []>("op_373_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, squeeze_mask = var_373_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor<string, []>("op_373_cast_fp16")];
tensor<int32, [4]> v_7_perm_0 = const()[name = tensor<string, []>("v_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_376_transpose_x_0 = const()[name = tensor<string, []>("op_376_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_376_transpose_y_0 = const()[name = tensor<string, []>("op_376_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_78_perm_0 = const()[name = tensor<string, []>("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_79_perm_0 = const()[name = tensor<string, []>("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_79 = transpose(perm = transpose_79_perm_0, x = var_369_cast_fp16)[name = tensor<string, []>("transpose_223")];
tensor<fp16, [1, 1, 1, 128]> transpose_78 = transpose(perm = transpose_78_perm_0, x = var_361_cast_fp16)[name = tensor<string, []>("transpose_224")];
tensor<fp16, [1, 1, 1, 256]> var_376_cast_fp16 = matmul(transpose_x = var_376_transpose_x_0, transpose_y = var_376_transpose_y_0, x = transpose_78, y = transpose_79)[name = tensor<string, []>("op_376_cast_fp16")];
tensor<fp16, []> var_377_to_fp16 = const()[name = tensor<string, []>("op_377_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_13_cast_fp16 = mul(x = var_376_cast_fp16, y = var_377_to_fp16)[name = tensor<string, []>("attn_13_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_31_cast_fp16 = add(x = attn_13_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_15_cast_fp16 = softmax(axis = var_264, x = input_31_cast_fp16)[name = tensor<string, []>("attn_15_cast_fp16")];
tensor<bool, []> out_7_transpose_x_0 = const()[name = tensor<string, []>("out_7_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_7_transpose_y_0 = const()[name = tensor<string, []>("out_7_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_7_cast_fp16 = transpose(perm = v_7_perm_0, x = var_373_cast_fp16)[name = tensor<string, []>("transpose_225")];
tensor<fp16, [1, 1, 1, 128]> out_7_cast_fp16 = matmul(transpose_x = out_7_transpose_x_0, transpose_y = out_7_transpose_y_0, x = attn_15_cast_fp16, y = v_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
tensor<int32, [4]> var_388_perm_0 = const()[name = tensor<string, []>("op_388_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_389 = const()[name = tensor<string, []>("op_389"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_388_cast_fp16 = transpose(perm = var_388_perm_0, x = out_7_cast_fp16)[name = tensor<string, []>("transpose_222")];
tensor<fp16, [1, 1, 128]> input_33_cast_fp16 = reshape(shape = var_389, x = var_388_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
tensor<fp16, [768, 128]> layers_1_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23416640)))];
tensor<fp16, [1, 1, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_cross_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_35_cast_fp16 = add(x = input_29_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
tensor<int32, [1]> x_15_axes_0 = const()[name = tensor<string, []>("x_15_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_1_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23613312)))];
tensor<fp16, [1, 1, 768]> x_15_cast_fp16 = layer_norm(axes = x_15_axes_0, epsilon = var_267_to_fp16, gamma = layers_1_norm_ff_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
tensor<int32, [3]> input_37_perm_0 = const()[name = tensor<string, []>("input_37_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_1_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23614912)))];
tensor<fp16, [1, 768, 1]> input_37_cast_fp16 = transpose(perm = input_37_perm_0, x = x_15_cast_fp16)[name = tensor<string, []>("transpose_221")];
tensor<fp16, [1, 3072, 1]> input_39_cast_fp16 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = layers_1_ffn_conv1_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
tensor<string, []> x_17_pad_type_0 = const()[name = tensor<string, []>("x_17_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_17_strides_0 = const()[name = tensor<string, []>("x_17_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_17_pad_0 = const()[name = tensor<string, []>("x_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_17_dilations_0 = const()[name = tensor<string, []>("x_17_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_17_groups_0 = const()[name = tensor<string, []>("x_17_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_1_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28333568)))];
tensor<fp16, [1, 768, 1]> x_17_cast_fp16 = conv(dilations = x_17_dilations_0, groups = x_17_groups_0, pad = x_17_pad_0, pad_type = x_17_pad_type_0, strides = x_17_strides_0, weight = layers_1_ffn_conv2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
tensor<int32, [3]> x_19_perm_0 = const()[name = tensor<string, []>("x_19_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_19_cast_fp16 = transpose(perm = x_19_perm_0, x = x_17_cast_fp16)[name = tensor<string, []>("transpose_220")];
tensor<fp16, [1, 1, 768]> input_43_cast_fp16 = add(x = input_35_cast_fp16, y = x_19_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
tensor<int32, []> var_434 = const()[name = tensor<string, []>("op_434"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_21_axes_0 = const()[name = tensor<string, []>("x_21_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_2_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33052224)))];
tensor<fp16, []> var_437_to_fp16 = const()[name = tensor<string, []>("op_437_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_21_cast_fp16 = layer_norm(axes = x_21_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_sa_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("x_21_cast_fp16")];
tensor<fp16, [2304, 768]> layers_2_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33053824)))];
tensor<fp16, [1, 1, 2304]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_qkv_proj_weight_to_fp16, x = x_21_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
tensor<int32, [5]> var_456 = const()[name = tensor<string, []>("op_456"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_11_cast_fp16 = reshape(shape = var_456, x = linear_10_cast_fp16)[name = tensor<string, []>("qkv_11_cast_fp16")];
tensor<int32, [5]> q_9_begin_0 = const()[name = tensor<string, []>("q_9_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_9_end_0 = const()[name = tensor<string, []>("q_9_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_9_end_mask_0 = const()[name = tensor<string, []>("q_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_9_squeeze_mask_0 = const()[name = tensor<string, []>("q_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")];
tensor<int32, [5]> k_9_begin_0 = const()[name = tensor<string, []>("k_9_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_9_end_0 = const()[name = tensor<string, []>("k_9_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_9_end_mask_0 = const()[name = tensor<string, []>("k_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_9_squeeze_mask_0 = const()[name = tensor<string, []>("k_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_9_cast_fp16 = slice_by_index(begin = k_9_begin_0, end = k_9_end_0, end_mask = k_9_end_mask_0, squeeze_mask = k_9_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
tensor<int32, [5]> v_9_begin_0 = const()[name = tensor<string, []>("v_9_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_9_end_0 = const()[name = tensor<string, []>("v_9_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_9_end_mask_0 = const()[name = tensor<string, []>("v_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_9_squeeze_mask_0 = const()[name = tensor<string, []>("v_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_9_cast_fp16 = slice_by_index(begin = v_9_begin_0, end = v_9_end_0, end_mask = v_9_end_mask_0, squeeze_mask = v_9_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("v_9_cast_fp16")];
tensor<bool, [512]> var_468_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position2)[name = tensor<string, []>("op_468_cast_fp16")];
tensor<int32, [4]> var_470 = const()[name = tensor<string, []>("op_470"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_469_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_469_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_468_cast_fp16_to_fp16 = cast(dtype = var_469_to_fp16_dtype_0, x = var_468_cast_fp16)[name = tensor<string, []>("cast_181")];
tensor<fp16, [1, 512, 1, 1]> mask_5_cast_fp16 = reshape(shape = var_470, x = var_468_cast_fp16_to_fp16)[name = tensor<string, []>("mask_5_cast_fp16")];
tensor<int32, [4]> k_new_5_reps_0 = const()[name = tensor<string, []>("k_new_5_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_5_cast_fp16 = tile(reps = k_new_5_reps_0, x = k_9_cast_fp16)[name = tensor<string, []>("k_new_5_cast_fp16")];
tensor<int32, [4]> v_new_5_reps_0 = const()[name = tensor<string, []>("v_new_5_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_5_cast_fp16 = tile(reps = v_new_5_reps_0, x = v_9_cast_fp16)[name = tensor<string, []>("v_new_5_cast_fp16")];
tensor<fp16, []> var_432_to_fp16 = const()[name = tensor<string, []>("op_432_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_476_cast_fp16 = sub(x = var_432_to_fp16, y = mask_5_cast_fp16)[name = tensor<string, []>("op_476_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_477_cast_fp16 = mul(x = cache_k2, y = var_476_cast_fp16)[name = tensor<string, []>("op_477_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_478_cast_fp16 = mul(x = k_new_5_cast_fp16, y = mask_5_cast_fp16)[name = tensor<string, []>("op_478_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_5 = add(x = var_477_cast_fp16, y = var_478_cast_fp16)[name = tensor<string, []>("new_k_5_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_481_cast_fp16 = mul(x = cache_v2, y = var_476_cast_fp16)[name = tensor<string, []>("op_481_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_482_cast_fp16 = mul(x = v_new_5_cast_fp16, y = mask_5_cast_fp16)[name = tensor<string, []>("op_482_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_5 = add(x = var_481_cast_fp16, y = var_482_cast_fp16)[name = tensor<string, []>("new_v_5_cast_fp16")];
tensor<bool, [512]> var_484_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position2)[name = tensor<string, []>("op_484_cast_fp16")];
tensor<int32, [4]> var_486 = const()[name = tensor<string, []>("op_486"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_485_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_485_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_484_cast_fp16_to_fp16 = cast(dtype = var_485_to_fp16_dtype_0, x = var_484_cast_fp16)[name = tensor<string, []>("cast_180")];
tensor<fp16, [1, 1, 1, 512]> var_487_cast_fp16 = reshape(shape = var_486, x = var_484_cast_fp16_to_fp16)[name = tensor<string, []>("op_487_cast_fp16")];
tensor<int32, [4]> var_491 = const()[name = tensor<string, []>("op_491"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_494_transpose_x_0 = const()[name = tensor<string, []>("op_494_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_494_transpose_y_0 = const()[name = tensor<string, []>("op_494_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_80_perm_0 = const()[name = tensor<string, []>("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_81_perm_0 = const()[name = tensor<string, []>("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_81 = transpose(perm = transpose_81_perm_0, x = new_k_5)[name = tensor<string, []>("transpose_217")];
tensor<fp16, [1, 12, 1, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = q_9_cast_fp16)[name = tensor<string, []>("transpose_218")];
tensor<fp16, [1, 12, 1, 512]> var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_0, transpose_y = var_494_transpose_y_0, x = transpose_80, y = transpose_81)[name = tensor<string, []>("op_494_cast_fp16")];
tensor<fp16, []> var_495_to_fp16 = const()[name = tensor<string, []>("op_495_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_17_cast_fp16 = mul(x = var_494_cast_fp16, y = var_495_to_fp16)[name = tensor<string, []>("attn_17_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_497_cast_fp16 = sub(x = var_432_to_fp16, y = var_487_cast_fp16)[name = tensor<string, []>("op_497_cast_fp16")];
tensor<fp16, []> var_498_to_fp16 = const()[name = tensor<string, []>("op_498_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_499_cast_fp16 = mul(x = var_497_cast_fp16, y = var_498_to_fp16)[name = tensor<string, []>("op_499_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_45_cast_fp16 = add(x = attn_17_cast_fp16, y = var_499_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_19_cast_fp16 = softmax(axis = var_434, x = input_45_cast_fp16)[name = tensor<string, []>("attn_19_cast_fp16")];
tensor<bool, []> out_9_transpose_x_0 = const()[name = tensor<string, []>("out_9_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_9_transpose_y_0 = const()[name = tensor<string, []>("out_9_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_5_cast_fp16 = transpose(perm = var_491, x = new_v_5)[name = tensor<string, []>("transpose_219")];
tensor<fp16, [1, 12, 1, 64]> out_9_cast_fp16 = matmul(transpose_x = out_9_transpose_x_0, transpose_y = out_9_transpose_y_0, x = attn_19_cast_fp16, y = v4_5_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
tensor<int32, [4]> var_503_perm_0 = const()[name = tensor<string, []>("op_503_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_504 = const()[name = tensor<string, []>("op_504"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_503_cast_fp16 = transpose(perm = var_503_perm_0, x = out_9_cast_fp16)[name = tensor<string, []>("transpose_216")];
tensor<fp16, [1, 1, 768]> input_47_cast_fp16 = reshape(shape = var_504, x = var_503_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
tensor<fp16, [768, 768]> layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36592832)))];
tensor<fp16, [1, 1, 768]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
tensor<fp16, []> var_508_to_fp16 = const()[name = tensor<string, []>("op_508_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_509 = add(x = position2, y = var_508_to_fp16)[name = tensor<string, []>("op_509_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_49_cast_fp16 = add(x = input_43_cast_fp16, y = linear_11_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
tensor<int32, [1]> x_23_axes_0 = const()[name = tensor<string, []>("x_23_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_2_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37772544)))];
tensor<fp16, [1, 1, 768]> x_23_cast_fp16 = layer_norm(axes = x_23_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_xa_query_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")];
tensor<int32, [1]> memory_5_axes_0 = const()[name = tensor<string, []>("memory_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_2_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37774144)))];
tensor<fp16, [1, 256, 768]> memory_5_cast_fp16 = layer_norm(axes = memory_5_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_5_cast_fp16")];
tensor<fp16, [128, 768]> layers_2_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37775744)))];
tensor<fp16, [1, 1, 128]> linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_2_cross_attn_q_proj_weight_to_fp16, x = x_23_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
tensor<int32, [4]> var_530 = const()[name = tensor<string, []>("op_530"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_531_cast_fp16 = reshape(shape = var_530, x = linear_12_cast_fp16)[name = tensor<string, []>("op_531_cast_fp16")];
tensor<fp16, [256, 768]> layers_2_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37972416)))];
tensor<fp16, [1, 256, 256]> linear_13_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_2_cross_attn_kv_proj_weight_to_fp16, x = memory_5_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
tensor<int32, [5]> var_535 = const()[name = tensor<string, []>("op_535"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_5_cast_fp16 = reshape(shape = var_535, x = linear_13_cast_fp16)[name = tensor<string, []>("kv_5_cast_fp16")];
tensor<int32, [5]> var_539_begin_0 = const()[name = tensor<string, []>("op_539_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_539_end_0 = const()[name = tensor<string, []>("op_539_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_539_end_mask_0 = const()[name = tensor<string, []>("op_539_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_539_squeeze_mask_0 = const()[name = tensor<string, []>("op_539_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_539_cast_fp16 = slice_by_index(begin = var_539_begin_0, end = var_539_end_0, end_mask = var_539_end_mask_0, squeeze_mask = var_539_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor<string, []>("op_539_cast_fp16")];
tensor<int32, [5]> var_543_begin_0 = const()[name = tensor<string, []>("op_543_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_543_end_0 = const()[name = tensor<string, []>("op_543_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_543_end_mask_0 = const()[name = tensor<string, []>("op_543_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_543_squeeze_mask_0 = const()[name = tensor<string, []>("op_543_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_543_cast_fp16 = slice_by_index(begin = var_543_begin_0, end = var_543_end_0, end_mask = var_543_end_mask_0, squeeze_mask = var_543_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor<string, []>("op_543_cast_fp16")];
tensor<int32, [4]> v_11_perm_0 = const()[name = tensor<string, []>("v_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_546_transpose_x_0 = const()[name = tensor<string, []>("op_546_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_546_transpose_y_0 = const()[name = tensor<string, []>("op_546_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_82_perm_0 = const()[name = tensor<string, []>("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_83_perm_0 = const()[name = tensor<string, []>("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_83 = transpose(perm = transpose_83_perm_0, x = var_539_cast_fp16)[name = tensor<string, []>("transpose_213")];
tensor<fp16, [1, 1, 1, 128]> transpose_82 = transpose(perm = transpose_82_perm_0, x = var_531_cast_fp16)[name = tensor<string, []>("transpose_214")];
tensor<fp16, [1, 1, 1, 256]> var_546_cast_fp16 = matmul(transpose_x = var_546_transpose_x_0, transpose_y = var_546_transpose_y_0, x = transpose_82, y = transpose_83)[name = tensor<string, []>("op_546_cast_fp16")];
tensor<fp16, []> var_547_to_fp16 = const()[name = tensor<string, []>("op_547_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_21_cast_fp16 = mul(x = var_546_cast_fp16, y = var_547_to_fp16)[name = tensor<string, []>("attn_21_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_51_cast_fp16 = add(x = attn_21_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_23_cast_fp16 = softmax(axis = var_434, x = input_51_cast_fp16)[name = tensor<string, []>("attn_23_cast_fp16")];
tensor<bool, []> out_11_transpose_x_0 = const()[name = tensor<string, []>("out_11_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_11_transpose_y_0 = const()[name = tensor<string, []>("out_11_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_11_cast_fp16 = transpose(perm = v_11_perm_0, x = var_543_cast_fp16)[name = tensor<string, []>("transpose_215")];
tensor<fp16, [1, 1, 1, 128]> out_11_cast_fp16 = matmul(transpose_x = out_11_transpose_x_0, transpose_y = out_11_transpose_y_0, x = attn_23_cast_fp16, y = v_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
tensor<int32, [4]> var_558_perm_0 = const()[name = tensor<string, []>("op_558_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_559 = const()[name = tensor<string, []>("op_559"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_558_cast_fp16 = transpose(perm = var_558_perm_0, x = out_11_cast_fp16)[name = tensor<string, []>("transpose_212")];
tensor<fp16, [1, 1, 128]> input_53_cast_fp16 = reshape(shape = var_559, x = var_558_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
tensor<fp16, [768, 128]> layers_2_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38365696)))];
tensor<fp16, [1, 1, 768]> linear_14_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_cross_attn_o_proj_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_55_cast_fp16 = add(x = input_49_cast_fp16, y = linear_14_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
tensor<int32, [1]> x_25_axes_0 = const()[name = tensor<string, []>("x_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_2_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38562368)))];
tensor<fp16, [1, 1, 768]> x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_437_to_fp16, gamma = layers_2_norm_ff_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
tensor<int32, [3]> input_57_perm_0 = const()[name = tensor<string, []>("input_57_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_2_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38563968)))];
tensor<fp16, [1, 768, 1]> input_57_cast_fp16 = transpose(perm = input_57_perm_0, x = x_25_cast_fp16)[name = tensor<string, []>("transpose_211")];
tensor<fp16, [1, 3072, 1]> input_59_cast_fp16 = conv(dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = layers_2_ffn_conv1_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
tensor<string, []> x_27_pad_type_0 = const()[name = tensor<string, []>("x_27_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_27_strides_0 = const()[name = tensor<string, []>("x_27_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_27_pad_0 = const()[name = tensor<string, []>("x_27_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_27_dilations_0 = const()[name = tensor<string, []>("x_27_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_27_groups_0 = const()[name = tensor<string, []>("x_27_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_2_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_2_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43282624)))];
tensor<fp16, [1, 768, 1]> x_27_cast_fp16 = conv(dilations = x_27_dilations_0, groups = x_27_groups_0, pad = x_27_pad_0, pad_type = x_27_pad_type_0, strides = x_27_strides_0, weight = layers_2_ffn_conv2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("x_27_cast_fp16")];
tensor<int32, [3]> x_29_perm_0 = const()[name = tensor<string, []>("x_29_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_29_cast_fp16 = transpose(perm = x_29_perm_0, x = x_27_cast_fp16)[name = tensor<string, []>("transpose_210")];
tensor<fp16, [1, 1, 768]> input_63_cast_fp16 = add(x = input_55_cast_fp16, y = x_29_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
tensor<int32, []> var_604 = const()[name = tensor<string, []>("op_604"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_31_axes_0 = const()[name = tensor<string, []>("x_31_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_3_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48001280)))];
tensor<fp16, []> var_607_to_fp16 = const()[name = tensor<string, []>("op_607_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_31_cast_fp16 = layer_norm(axes = x_31_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_sa_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
tensor<fp16, [2304, 768]> layers_3_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48002880)))];
tensor<fp16, [1, 1, 2304]> linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_qkv_proj_weight_to_fp16, x = x_31_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
tensor<int32, [5]> var_626 = const()[name = tensor<string, []>("op_626"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_15_cast_fp16 = reshape(shape = var_626, x = linear_15_cast_fp16)[name = tensor<string, []>("qkv_15_cast_fp16")];
tensor<int32, [5]> q_13_begin_0 = const()[name = tensor<string, []>("q_13_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_13_end_0 = const()[name = tensor<string, []>("q_13_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_13_end_mask_0 = const()[name = tensor<string, []>("q_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_13_squeeze_mask_0 = const()[name = tensor<string, []>("q_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("q_13_cast_fp16")];
tensor<int32, [5]> k_13_begin_0 = const()[name = tensor<string, []>("k_13_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_13_end_0 = const()[name = tensor<string, []>("k_13_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_13_end_mask_0 = const()[name = tensor<string, []>("k_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_13_squeeze_mask_0 = const()[name = tensor<string, []>("k_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_13_cast_fp16 = slice_by_index(begin = k_13_begin_0, end = k_13_end_0, end_mask = k_13_end_mask_0, squeeze_mask = k_13_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
tensor<int32, [5]> v_13_begin_0 = const()[name = tensor<string, []>("v_13_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_13_end_0 = const()[name = tensor<string, []>("v_13_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_13_end_mask_0 = const()[name = tensor<string, []>("v_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_13_squeeze_mask_0 = const()[name = tensor<string, []>("v_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_13_cast_fp16 = slice_by_index(begin = v_13_begin_0, end = v_13_end_0, end_mask = v_13_end_mask_0, squeeze_mask = v_13_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("v_13_cast_fp16")];
tensor<bool, [512]> var_638_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position3)[name = tensor<string, []>("op_638_cast_fp16")];
tensor<int32, [4]> var_640 = const()[name = tensor<string, []>("op_640"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_639_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_639_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_638_cast_fp16_to_fp16 = cast(dtype = var_639_to_fp16_dtype_0, x = var_638_cast_fp16)[name = tensor<string, []>("cast_179")];
tensor<fp16, [1, 512, 1, 1]> mask_7_cast_fp16 = reshape(shape = var_640, x = var_638_cast_fp16_to_fp16)[name = tensor<string, []>("mask_7_cast_fp16")];
tensor<int32, [4]> k_new_7_reps_0 = const()[name = tensor<string, []>("k_new_7_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_7_cast_fp16 = tile(reps = k_new_7_reps_0, x = k_13_cast_fp16)[name = tensor<string, []>("k_new_7_cast_fp16")];
tensor<int32, [4]> v_new_7_reps_0 = const()[name = tensor<string, []>("v_new_7_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_7_cast_fp16 = tile(reps = v_new_7_reps_0, x = v_13_cast_fp16)[name = tensor<string, []>("v_new_7_cast_fp16")];
tensor<fp16, []> var_602_to_fp16 = const()[name = tensor<string, []>("op_602_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_646_cast_fp16 = sub(x = var_602_to_fp16, y = mask_7_cast_fp16)[name = tensor<string, []>("op_646_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_647_cast_fp16 = mul(x = cache_k3, y = var_646_cast_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_648_cast_fp16 = mul(x = k_new_7_cast_fp16, y = mask_7_cast_fp16)[name = tensor<string, []>("op_648_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_7 = add(x = var_647_cast_fp16, y = var_648_cast_fp16)[name = tensor<string, []>("new_k_7_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_651_cast_fp16 = mul(x = cache_v3, y = var_646_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_652_cast_fp16 = mul(x = v_new_7_cast_fp16, y = mask_7_cast_fp16)[name = tensor<string, []>("op_652_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_7 = add(x = var_651_cast_fp16, y = var_652_cast_fp16)[name = tensor<string, []>("new_v_7_cast_fp16")];
tensor<bool, [512]> var_654_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position3)[name = tensor<string, []>("op_654_cast_fp16")];
tensor<int32, [4]> var_656 = const()[name = tensor<string, []>("op_656"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_655_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_655_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_654_cast_fp16_to_fp16 = cast(dtype = var_655_to_fp16_dtype_0, x = var_654_cast_fp16)[name = tensor<string, []>("cast_178")];
tensor<fp16, [1, 1, 1, 512]> var_657_cast_fp16 = reshape(shape = var_656, x = var_654_cast_fp16_to_fp16)[name = tensor<string, []>("op_657_cast_fp16")];
tensor<int32, [4]> var_661 = const()[name = tensor<string, []>("op_661"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_664_transpose_x_0 = const()[name = tensor<string, []>("op_664_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_664_transpose_y_0 = const()[name = tensor<string, []>("op_664_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_84_perm_0 = const()[name = tensor<string, []>("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_85_perm_0 = const()[name = tensor<string, []>("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_85 = transpose(perm = transpose_85_perm_0, x = new_k_7)[name = tensor<string, []>("transpose_207")];
tensor<fp16, [1, 12, 1, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = q_13_cast_fp16)[name = tensor<string, []>("transpose_208")];
tensor<fp16, [1, 12, 1, 512]> var_664_cast_fp16 = matmul(transpose_x = var_664_transpose_x_0, transpose_y = var_664_transpose_y_0, x = transpose_84, y = transpose_85)[name = tensor<string, []>("op_664_cast_fp16")];
tensor<fp16, []> var_665_to_fp16 = const()[name = tensor<string, []>("op_665_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_25_cast_fp16 = mul(x = var_664_cast_fp16, y = var_665_to_fp16)[name = tensor<string, []>("attn_25_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_667_cast_fp16 = sub(x = var_602_to_fp16, y = var_657_cast_fp16)[name = tensor<string, []>("op_667_cast_fp16")];
tensor<fp16, []> var_668_to_fp16 = const()[name = tensor<string, []>("op_668_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_669_cast_fp16 = mul(x = var_667_cast_fp16, y = var_668_to_fp16)[name = tensor<string, []>("op_669_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_65_cast_fp16 = add(x = attn_25_cast_fp16, y = var_669_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_27_cast_fp16 = softmax(axis = var_604, x = input_65_cast_fp16)[name = tensor<string, []>("attn_27_cast_fp16")];
tensor<bool, []> out_13_transpose_x_0 = const()[name = tensor<string, []>("out_13_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_13_transpose_y_0 = const()[name = tensor<string, []>("out_13_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_7_cast_fp16 = transpose(perm = var_661, x = new_v_7)[name = tensor<string, []>("transpose_209")];
tensor<fp16, [1, 12, 1, 64]> out_13_cast_fp16 = matmul(transpose_x = out_13_transpose_x_0, transpose_y = out_13_transpose_y_0, x = attn_27_cast_fp16, y = v4_7_cast_fp16)[name = tensor<string, []>("out_13_cast_fp16")];
tensor<int32, [4]> var_673_perm_0 = const()[name = tensor<string, []>("op_673_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_674 = const()[name = tensor<string, []>("op_674"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_673_cast_fp16 = transpose(perm = var_673_perm_0, x = out_13_cast_fp16)[name = tensor<string, []>("transpose_206")];
tensor<fp16, [1, 1, 768]> input_67_cast_fp16 = reshape(shape = var_674, x = var_673_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
tensor<fp16, [768, 768]> layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51541888)))];
tensor<fp16, [1, 1, 768]> linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
tensor<fp16, []> var_678_to_fp16 = const()[name = tensor<string, []>("op_678_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_679 = add(x = position3, y = var_678_to_fp16)[name = tensor<string, []>("op_679_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_69_cast_fp16 = add(x = input_63_cast_fp16, y = linear_16_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
tensor<int32, [1]> x_33_axes_0 = const()[name = tensor<string, []>("x_33_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_3_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52721600)))];
tensor<fp16, [1, 1, 768]> x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_xa_query_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("x_33_cast_fp16")];
tensor<int32, [1]> memory_7_axes_0 = const()[name = tensor<string, []>("memory_7_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_3_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52723200)))];
tensor<fp16, [1, 256, 768]> memory_7_cast_fp16 = layer_norm(axes = memory_7_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_7_cast_fp16")];
tensor<fp16, [128, 768]> layers_3_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52724800)))];
tensor<fp16, [1, 1, 128]> linear_17_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_3_cross_attn_q_proj_weight_to_fp16, x = x_33_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
tensor<int32, [4]> var_700 = const()[name = tensor<string, []>("op_700"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_701_cast_fp16 = reshape(shape = var_700, x = linear_17_cast_fp16)[name = tensor<string, []>("op_701_cast_fp16")];
tensor<fp16, [256, 768]> layers_3_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52921472)))];
tensor<fp16, [1, 256, 256]> linear_18_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_3_cross_attn_kv_proj_weight_to_fp16, x = memory_7_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
tensor<int32, [5]> var_705 = const()[name = tensor<string, []>("op_705"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_7_cast_fp16 = reshape(shape = var_705, x = linear_18_cast_fp16)[name = tensor<string, []>("kv_7_cast_fp16")];
tensor<int32, [5]> var_709_begin_0 = const()[name = tensor<string, []>("op_709_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_709_end_0 = const()[name = tensor<string, []>("op_709_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_709_end_mask_0 = const()[name = tensor<string, []>("op_709_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_709_squeeze_mask_0 = const()[name = tensor<string, []>("op_709_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_709_cast_fp16 = slice_by_index(begin = var_709_begin_0, end = var_709_end_0, end_mask = var_709_end_mask_0, squeeze_mask = var_709_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor<string, []>("op_709_cast_fp16")];
tensor<int32, [5]> var_713_begin_0 = const()[name = tensor<string, []>("op_713_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_713_end_0 = const()[name = tensor<string, []>("op_713_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_713_end_mask_0 = const()[name = tensor<string, []>("op_713_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_713_squeeze_mask_0 = const()[name = tensor<string, []>("op_713_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_713_cast_fp16 = slice_by_index(begin = var_713_begin_0, end = var_713_end_0, end_mask = var_713_end_mask_0, squeeze_mask = var_713_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor<string, []>("op_713_cast_fp16")];
tensor<int32, [4]> v_15_perm_0 = const()[name = tensor<string, []>("v_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_716_transpose_x_0 = const()[name = tensor<string, []>("op_716_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_716_transpose_y_0 = const()[name = tensor<string, []>("op_716_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_86_perm_0 = const()[name = tensor<string, []>("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_87_perm_0 = const()[name = tensor<string, []>("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_87 = transpose(perm = transpose_87_perm_0, x = var_709_cast_fp16)[name = tensor<string, []>("transpose_203")];
tensor<fp16, [1, 1, 1, 128]> transpose_86 = transpose(perm = transpose_86_perm_0, x = var_701_cast_fp16)[name = tensor<string, []>("transpose_204")];
tensor<fp16, [1, 1, 1, 256]> var_716_cast_fp16 = matmul(transpose_x = var_716_transpose_x_0, transpose_y = var_716_transpose_y_0, x = transpose_86, y = transpose_87)[name = tensor<string, []>("op_716_cast_fp16")];
tensor<fp16, []> var_717_to_fp16 = const()[name = tensor<string, []>("op_717_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_29_cast_fp16 = mul(x = var_716_cast_fp16, y = var_717_to_fp16)[name = tensor<string, []>("attn_29_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_71_cast_fp16 = add(x = attn_29_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_31_cast_fp16 = softmax(axis = var_604, x = input_71_cast_fp16)[name = tensor<string, []>("attn_31_cast_fp16")];
tensor<bool, []> out_15_transpose_x_0 = const()[name = tensor<string, []>("out_15_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_15_transpose_y_0 = const()[name = tensor<string, []>("out_15_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_15_cast_fp16 = transpose(perm = v_15_perm_0, x = var_713_cast_fp16)[name = tensor<string, []>("transpose_205")];
tensor<fp16, [1, 1, 1, 128]> out_15_cast_fp16 = matmul(transpose_x = out_15_transpose_x_0, transpose_y = out_15_transpose_y_0, x = attn_31_cast_fp16, y = v_15_cast_fp16)[name = tensor<string, []>("out_15_cast_fp16")];
tensor<int32, [4]> var_728_perm_0 = const()[name = tensor<string, []>("op_728_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_729 = const()[name = tensor<string, []>("op_729"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_728_cast_fp16 = transpose(perm = var_728_perm_0, x = out_15_cast_fp16)[name = tensor<string, []>("transpose_202")];
tensor<fp16, [1, 1, 128]> input_73_cast_fp16 = reshape(shape = var_729, x = var_728_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
tensor<fp16, [768, 128]> layers_3_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53314752)))];
tensor<fp16, [1, 1, 768]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_cross_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_75_cast_fp16 = add(x = input_69_cast_fp16, y = linear_19_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
tensor<int32, [1]> x_35_axes_0 = const()[name = tensor<string, []>("x_35_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_3_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53511424)))];
tensor<fp16, [1, 1, 768]> x_35_cast_fp16 = layer_norm(axes = x_35_axes_0, epsilon = var_607_to_fp16, gamma = layers_3_norm_ff_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("x_35_cast_fp16")];
tensor<int32, [3]> input_77_perm_0 = const()[name = tensor<string, []>("input_77_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_3_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53513024)))];
tensor<fp16, [1, 768, 1]> input_77_cast_fp16 = transpose(perm = input_77_perm_0, x = x_35_cast_fp16)[name = tensor<string, []>("transpose_201")];
tensor<fp16, [1, 3072, 1]> input_79_cast_fp16 = conv(dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = layers_3_ffn_conv1_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
tensor<string, []> x_37_pad_type_0 = const()[name = tensor<string, []>("x_37_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_37_strides_0 = const()[name = tensor<string, []>("x_37_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_37_pad_0 = const()[name = tensor<string, []>("x_37_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_37_dilations_0 = const()[name = tensor<string, []>("x_37_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_37_groups_0 = const()[name = tensor<string, []>("x_37_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_3_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_3_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58231680)))];
tensor<fp16, [1, 768, 1]> x_37_cast_fp16 = conv(dilations = x_37_dilations_0, groups = x_37_groups_0, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = x_37_strides_0, weight = layers_3_ffn_conv2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("x_37_cast_fp16")];
tensor<int32, [3]> x_39_perm_0 = const()[name = tensor<string, []>("x_39_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_39_cast_fp16 = transpose(perm = x_39_perm_0, x = x_37_cast_fp16)[name = tensor<string, []>("transpose_200")];
tensor<fp16, [1, 1, 768]> input_83_cast_fp16 = add(x = input_75_cast_fp16, y = x_39_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
tensor<int32, []> var_774 = const()[name = tensor<string, []>("op_774"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_41_axes_0 = const()[name = tensor<string, []>("x_41_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_4_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62950336)))];
tensor<fp16, []> var_777_to_fp16 = const()[name = tensor<string, []>("op_777_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_sa_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("x_41_cast_fp16")];
tensor<fp16, [2304, 768]> layers_4_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62951936)))];
tensor<fp16, [1, 1, 2304]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_qkv_proj_weight_to_fp16, x = x_41_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
tensor<int32, [5]> var_796 = const()[name = tensor<string, []>("op_796"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_19_cast_fp16 = reshape(shape = var_796, x = linear_20_cast_fp16)[name = tensor<string, []>("qkv_19_cast_fp16")];
tensor<int32, [5]> q_17_begin_0 = const()[name = tensor<string, []>("q_17_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_17_end_0 = const()[name = tensor<string, []>("q_17_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_17_end_mask_0 = const()[name = tensor<string, []>("q_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_17_squeeze_mask_0 = const()[name = tensor<string, []>("q_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("q_17_cast_fp16")];
tensor<int32, [5]> k_17_begin_0 = const()[name = tensor<string, []>("k_17_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_17_end_0 = const()[name = tensor<string, []>("k_17_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_17_end_mask_0 = const()[name = tensor<string, []>("k_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_17_squeeze_mask_0 = const()[name = tensor<string, []>("k_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_17_cast_fp16 = slice_by_index(begin = k_17_begin_0, end = k_17_end_0, end_mask = k_17_end_mask_0, squeeze_mask = k_17_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
tensor<int32, [5]> v_17_begin_0 = const()[name = tensor<string, []>("v_17_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_17_end_0 = const()[name = tensor<string, []>("v_17_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_17_end_mask_0 = const()[name = tensor<string, []>("v_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_17_squeeze_mask_0 = const()[name = tensor<string, []>("v_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_17_cast_fp16 = slice_by_index(begin = v_17_begin_0, end = v_17_end_0, end_mask = v_17_end_mask_0, squeeze_mask = v_17_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("v_17_cast_fp16")];
tensor<bool, [512]> var_808_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position4)[name = tensor<string, []>("op_808_cast_fp16")];
tensor<int32, [4]> var_810 = const()[name = tensor<string, []>("op_810"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_809_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_809_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_808_cast_fp16_to_fp16 = cast(dtype = var_809_to_fp16_dtype_0, x = var_808_cast_fp16)[name = tensor<string, []>("cast_177")];
tensor<fp16, [1, 512, 1, 1]> mask_9_cast_fp16 = reshape(shape = var_810, x = var_808_cast_fp16_to_fp16)[name = tensor<string, []>("mask_9_cast_fp16")];
tensor<int32, [4]> k_new_9_reps_0 = const()[name = tensor<string, []>("k_new_9_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_9_cast_fp16 = tile(reps = k_new_9_reps_0, x = k_17_cast_fp16)[name = tensor<string, []>("k_new_9_cast_fp16")];
tensor<int32, [4]> v_new_9_reps_0 = const()[name = tensor<string, []>("v_new_9_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_9_cast_fp16 = tile(reps = v_new_9_reps_0, x = v_17_cast_fp16)[name = tensor<string, []>("v_new_9_cast_fp16")];
tensor<fp16, []> var_772_to_fp16 = const()[name = tensor<string, []>("op_772_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_816_cast_fp16 = sub(x = var_772_to_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_817_cast_fp16 = mul(x = cache_k4, y = var_816_cast_fp16)[name = tensor<string, []>("op_817_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_818_cast_fp16 = mul(x = k_new_9_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_9 = add(x = var_817_cast_fp16, y = var_818_cast_fp16)[name = tensor<string, []>("new_k_9_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_821_cast_fp16 = mul(x = cache_v4, y = var_816_cast_fp16)[name = tensor<string, []>("op_821_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_822_cast_fp16 = mul(x = v_new_9_cast_fp16, y = mask_9_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_9 = add(x = var_821_cast_fp16, y = var_822_cast_fp16)[name = tensor<string, []>("new_v_9_cast_fp16")];
tensor<bool, [512]> var_824_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position4)[name = tensor<string, []>("op_824_cast_fp16")];
tensor<int32, [4]> var_826 = const()[name = tensor<string, []>("op_826"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_825_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_825_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_824_cast_fp16_to_fp16 = cast(dtype = var_825_to_fp16_dtype_0, x = var_824_cast_fp16)[name = tensor<string, []>("cast_176")];
tensor<fp16, [1, 1, 1, 512]> var_827_cast_fp16 = reshape(shape = var_826, x = var_824_cast_fp16_to_fp16)[name = tensor<string, []>("op_827_cast_fp16")];
tensor<int32, [4]> var_831 = const()[name = tensor<string, []>("op_831"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_834_transpose_x_0 = const()[name = tensor<string, []>("op_834_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_834_transpose_y_0 = const()[name = tensor<string, []>("op_834_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_88_perm_0 = const()[name = tensor<string, []>("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_89_perm_0 = const()[name = tensor<string, []>("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_89 = transpose(perm = transpose_89_perm_0, x = new_k_9)[name = tensor<string, []>("transpose_197")];
tensor<fp16, [1, 12, 1, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = q_17_cast_fp16)[name = tensor<string, []>("transpose_198")];
tensor<fp16, [1, 12, 1, 512]> var_834_cast_fp16 = matmul(transpose_x = var_834_transpose_x_0, transpose_y = var_834_transpose_y_0, x = transpose_88, y = transpose_89)[name = tensor<string, []>("op_834_cast_fp16")];
tensor<fp16, []> var_835_to_fp16 = const()[name = tensor<string, []>("op_835_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_33_cast_fp16 = mul(x = var_834_cast_fp16, y = var_835_to_fp16)[name = tensor<string, []>("attn_33_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_837_cast_fp16 = sub(x = var_772_to_fp16, y = var_827_cast_fp16)[name = tensor<string, []>("op_837_cast_fp16")];
tensor<fp16, []> var_838_to_fp16 = const()[name = tensor<string, []>("op_838_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_839_cast_fp16 = mul(x = var_837_cast_fp16, y = var_838_to_fp16)[name = tensor<string, []>("op_839_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_85_cast_fp16 = add(x = attn_33_cast_fp16, y = var_839_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_35_cast_fp16 = softmax(axis = var_774, x = input_85_cast_fp16)[name = tensor<string, []>("attn_35_cast_fp16")];
tensor<bool, []> out_17_transpose_x_0 = const()[name = tensor<string, []>("out_17_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_17_transpose_y_0 = const()[name = tensor<string, []>("out_17_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_9_cast_fp16 = transpose(perm = var_831, x = new_v_9)[name = tensor<string, []>("transpose_199")];
tensor<fp16, [1, 12, 1, 64]> out_17_cast_fp16 = matmul(transpose_x = out_17_transpose_x_0, transpose_y = out_17_transpose_y_0, x = attn_35_cast_fp16, y = v4_9_cast_fp16)[name = tensor<string, []>("out_17_cast_fp16")];
tensor<int32, [4]> var_843_perm_0 = const()[name = tensor<string, []>("op_843_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_844 = const()[name = tensor<string, []>("op_844"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_843_cast_fp16 = transpose(perm = var_843_perm_0, x = out_17_cast_fp16)[name = tensor<string, []>("transpose_196")];
tensor<fp16, [1, 1, 768]> input_87_cast_fp16 = reshape(shape = var_844, x = var_843_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
tensor<fp16, [768, 768]> layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66490944)))];
tensor<fp16, [1, 1, 768]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
tensor<fp16, []> var_848_to_fp16 = const()[name = tensor<string, []>("op_848_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_849 = add(x = position4, y = var_848_to_fp16)[name = tensor<string, []>("op_849_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_89_cast_fp16 = add(x = input_83_cast_fp16, y = linear_21_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
tensor<int32, [1]> x_43_axes_0 = const()[name = tensor<string, []>("x_43_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_4_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67670656)))];
tensor<fp16, [1, 1, 768]> x_43_cast_fp16 = layer_norm(axes = x_43_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_xa_query_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("x_43_cast_fp16")];
tensor<int32, [1]> memory_9_axes_0 = const()[name = tensor<string, []>("memory_9_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_4_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67672256)))];
tensor<fp16, [1, 256, 768]> memory_9_cast_fp16 = layer_norm(axes = memory_9_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_9_cast_fp16")];
tensor<fp16, [128, 768]> layers_4_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67673856)))];
tensor<fp16, [1, 1, 128]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_4_cross_attn_q_proj_weight_to_fp16, x = x_43_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
tensor<int32, [4]> var_870 = const()[name = tensor<string, []>("op_870"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_871_cast_fp16 = reshape(shape = var_870, x = linear_22_cast_fp16)[name = tensor<string, []>("op_871_cast_fp16")];
tensor<fp16, [256, 768]> layers_4_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67870528)))];
tensor<fp16, [1, 256, 256]> linear_23_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_4_cross_attn_kv_proj_weight_to_fp16, x = memory_9_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
tensor<int32, [5]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_9_cast_fp16 = reshape(shape = var_875, x = linear_23_cast_fp16)[name = tensor<string, []>("kv_9_cast_fp16")];
tensor<int32, [5]> var_879_begin_0 = const()[name = tensor<string, []>("op_879_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_879_end_0 = const()[name = tensor<string, []>("op_879_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_879_end_mask_0 = const()[name = tensor<string, []>("op_879_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_879_squeeze_mask_0 = const()[name = tensor<string, []>("op_879_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_879_cast_fp16 = slice_by_index(begin = var_879_begin_0, end = var_879_end_0, end_mask = var_879_end_mask_0, squeeze_mask = var_879_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor<string, []>("op_879_cast_fp16")];
tensor<int32, [5]> var_883_begin_0 = const()[name = tensor<string, []>("op_883_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_883_end_0 = const()[name = tensor<string, []>("op_883_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_883_end_mask_0 = const()[name = tensor<string, []>("op_883_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_883_squeeze_mask_0 = const()[name = tensor<string, []>("op_883_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_883_cast_fp16 = slice_by_index(begin = var_883_begin_0, end = var_883_end_0, end_mask = var_883_end_mask_0, squeeze_mask = var_883_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor<string, []>("op_883_cast_fp16")];
tensor<int32, [4]> v_19_perm_0 = const()[name = tensor<string, []>("v_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_886_transpose_x_0 = const()[name = tensor<string, []>("op_886_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_886_transpose_y_0 = const()[name = tensor<string, []>("op_886_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_90_perm_0 = const()[name = tensor<string, []>("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_91_perm_0 = const()[name = tensor<string, []>("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_91 = transpose(perm = transpose_91_perm_0, x = var_879_cast_fp16)[name = tensor<string, []>("transpose_193")];
tensor<fp16, [1, 1, 1, 128]> transpose_90 = transpose(perm = transpose_90_perm_0, x = var_871_cast_fp16)[name = tensor<string, []>("transpose_194")];
tensor<fp16, [1, 1, 1, 256]> var_886_cast_fp16 = matmul(transpose_x = var_886_transpose_x_0, transpose_y = var_886_transpose_y_0, x = transpose_90, y = transpose_91)[name = tensor<string, []>("op_886_cast_fp16")];
tensor<fp16, []> var_887_to_fp16 = const()[name = tensor<string, []>("op_887_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_37_cast_fp16 = mul(x = var_886_cast_fp16, y = var_887_to_fp16)[name = tensor<string, []>("attn_37_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_91_cast_fp16 = add(x = attn_37_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_39_cast_fp16 = softmax(axis = var_774, x = input_91_cast_fp16)[name = tensor<string, []>("attn_39_cast_fp16")];
tensor<bool, []> out_19_transpose_x_0 = const()[name = tensor<string, []>("out_19_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_19_transpose_y_0 = const()[name = tensor<string, []>("out_19_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_19_cast_fp16 = transpose(perm = v_19_perm_0, x = var_883_cast_fp16)[name = tensor<string, []>("transpose_195")];
tensor<fp16, [1, 1, 1, 128]> out_19_cast_fp16 = matmul(transpose_x = out_19_transpose_x_0, transpose_y = out_19_transpose_y_0, x = attn_39_cast_fp16, y = v_19_cast_fp16)[name = tensor<string, []>("out_19_cast_fp16")];
tensor<int32, [4]> var_898_perm_0 = const()[name = tensor<string, []>("op_898_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_899 = const()[name = tensor<string, []>("op_899"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_898_cast_fp16 = transpose(perm = var_898_perm_0, x = out_19_cast_fp16)[name = tensor<string, []>("transpose_192")];
tensor<fp16, [1, 1, 128]> input_93_cast_fp16 = reshape(shape = var_899, x = var_898_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
tensor<fp16, [768, 128]> layers_4_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68263808)))];
tensor<fp16, [1, 1, 768]> linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_cross_attn_o_proj_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_95_cast_fp16 = add(x = input_89_cast_fp16, y = linear_24_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
tensor<int32, [1]> x_45_axes_0 = const()[name = tensor<string, []>("x_45_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_4_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68460480)))];
tensor<fp16, [1, 1, 768]> x_45_cast_fp16 = layer_norm(axes = x_45_axes_0, epsilon = var_777_to_fp16, gamma = layers_4_norm_ff_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("x_45_cast_fp16")];
tensor<int32, [3]> input_97_perm_0 = const()[name = tensor<string, []>("input_97_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_4_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68462080)))];
tensor<fp16, [1, 768, 1]> input_97_cast_fp16 = transpose(perm = input_97_perm_0, x = x_45_cast_fp16)[name = tensor<string, []>("transpose_191")];
tensor<fp16, [1, 3072, 1]> input_99_cast_fp16 = conv(dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = layers_4_ffn_conv1_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
tensor<string, []> x_47_pad_type_0 = const()[name = tensor<string, []>("x_47_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_47_strides_0 = const()[name = tensor<string, []>("x_47_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_47_pad_0 = const()[name = tensor<string, []>("x_47_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_47_dilations_0 = const()[name = tensor<string, []>("x_47_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_47_groups_0 = const()[name = tensor<string, []>("x_47_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_4_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_4_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73180736)))];
tensor<fp16, [1, 768, 1]> x_47_cast_fp16 = conv(dilations = x_47_dilations_0, groups = x_47_groups_0, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = x_47_strides_0, weight = layers_4_ffn_conv2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("x_47_cast_fp16")];
tensor<int32, [3]> x_49_perm_0 = const()[name = tensor<string, []>("x_49_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_49_cast_fp16 = transpose(perm = x_49_perm_0, x = x_47_cast_fp16)[name = tensor<string, []>("transpose_190")];
tensor<fp16, [1, 1, 768]> input_103_cast_fp16 = add(x = input_95_cast_fp16, y = x_49_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
tensor<int32, []> var_944 = const()[name = tensor<string, []>("op_944"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_51_axes_0 = const()[name = tensor<string, []>("x_51_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_5_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77899392)))];
tensor<fp16, []> var_947_to_fp16 = const()[name = tensor<string, []>("op_947_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_51_cast_fp16 = layer_norm(axes = x_51_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_sa_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("x_51_cast_fp16")];
tensor<fp16, [2304, 768]> layers_5_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77900992)))];
tensor<fp16, [1, 1, 2304]> linear_25_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_qkv_proj_weight_to_fp16, x = x_51_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
tensor<int32, [5]> var_966 = const()[name = tensor<string, []>("op_966"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_23_cast_fp16 = reshape(shape = var_966, x = linear_25_cast_fp16)[name = tensor<string, []>("qkv_23_cast_fp16")];
tensor<int32, [5]> q_21_begin_0 = const()[name = tensor<string, []>("q_21_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_21_end_0 = const()[name = tensor<string, []>("q_21_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_21_end_mask_0 = const()[name = tensor<string, []>("q_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_21_squeeze_mask_0 = const()[name = tensor<string, []>("q_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("q_21_cast_fp16")];
tensor<int32, [5]> k_21_begin_0 = const()[name = tensor<string, []>("k_21_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_21_end_0 = const()[name = tensor<string, []>("k_21_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_21_end_mask_0 = const()[name = tensor<string, []>("k_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_21_squeeze_mask_0 = const()[name = tensor<string, []>("k_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_21_cast_fp16 = slice_by_index(begin = k_21_begin_0, end = k_21_end_0, end_mask = k_21_end_mask_0, squeeze_mask = k_21_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
tensor<int32, [5]> v_21_begin_0 = const()[name = tensor<string, []>("v_21_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_21_end_0 = const()[name = tensor<string, []>("v_21_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_21_end_mask_0 = const()[name = tensor<string, []>("v_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_21_squeeze_mask_0 = const()[name = tensor<string, []>("v_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_21_cast_fp16 = slice_by_index(begin = v_21_begin_0, end = v_21_end_0, end_mask = v_21_end_mask_0, squeeze_mask = v_21_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("v_21_cast_fp16")];
tensor<bool, [512]> var_978_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position5)[name = tensor<string, []>("op_978_cast_fp16")];
tensor<int32, [4]> var_980 = const()[name = tensor<string, []>("op_980"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_979_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_979_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_978_cast_fp16_to_fp16 = cast(dtype = var_979_to_fp16_dtype_0, x = var_978_cast_fp16)[name = tensor<string, []>("cast_175")];
tensor<fp16, [1, 512, 1, 1]> mask_11_cast_fp16 = reshape(shape = var_980, x = var_978_cast_fp16_to_fp16)[name = tensor<string, []>("mask_11_cast_fp16")];
tensor<int32, [4]> k_new_11_reps_0 = const()[name = tensor<string, []>("k_new_11_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_11_cast_fp16 = tile(reps = k_new_11_reps_0, x = k_21_cast_fp16)[name = tensor<string, []>("k_new_11_cast_fp16")];
tensor<int32, [4]> v_new_11_reps_0 = const()[name = tensor<string, []>("v_new_11_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_11_cast_fp16 = tile(reps = v_new_11_reps_0, x = v_21_cast_fp16)[name = tensor<string, []>("v_new_11_cast_fp16")];
tensor<fp16, []> var_942_to_fp16 = const()[name = tensor<string, []>("op_942_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_986_cast_fp16 = sub(x = var_942_to_fp16, y = mask_11_cast_fp16)[name = tensor<string, []>("op_986_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_987_cast_fp16 = mul(x = cache_k5, y = var_986_cast_fp16)[name = tensor<string, []>("op_987_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_988_cast_fp16 = mul(x = k_new_11_cast_fp16, y = mask_11_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_11 = add(x = var_987_cast_fp16, y = var_988_cast_fp16)[name = tensor<string, []>("new_k_11_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_991_cast_fp16 = mul(x = cache_v5, y = var_986_cast_fp16)[name = tensor<string, []>("op_991_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_992_cast_fp16 = mul(x = v_new_11_cast_fp16, y = mask_11_cast_fp16)[name = tensor<string, []>("op_992_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_11 = add(x = var_991_cast_fp16, y = var_992_cast_fp16)[name = tensor<string, []>("new_v_11_cast_fp16")];
tensor<bool, [512]> var_994_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position5)[name = tensor<string, []>("op_994_cast_fp16")];
tensor<int32, [4]> var_996 = const()[name = tensor<string, []>("op_996"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_995_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_995_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_994_cast_fp16_to_fp16 = cast(dtype = var_995_to_fp16_dtype_0, x = var_994_cast_fp16)[name = tensor<string, []>("cast_174")];
tensor<fp16, [1, 1, 1, 512]> var_997_cast_fp16 = reshape(shape = var_996, x = var_994_cast_fp16_to_fp16)[name = tensor<string, []>("op_997_cast_fp16")];
tensor<int32, [4]> var_1001 = const()[name = tensor<string, []>("op_1001"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1004_transpose_x_0 = const()[name = tensor<string, []>("op_1004_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1004_transpose_y_0 = const()[name = tensor<string, []>("op_1004_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_92_perm_0 = const()[name = tensor<string, []>("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_93_perm_0 = const()[name = tensor<string, []>("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_93 = transpose(perm = transpose_93_perm_0, x = new_k_11)[name = tensor<string, []>("transpose_187")];
tensor<fp16, [1, 12, 1, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = q_21_cast_fp16)[name = tensor<string, []>("transpose_188")];
tensor<fp16, [1, 12, 1, 512]> var_1004_cast_fp16 = matmul(transpose_x = var_1004_transpose_x_0, transpose_y = var_1004_transpose_y_0, x = transpose_92, y = transpose_93)[name = tensor<string, []>("op_1004_cast_fp16")];
tensor<fp16, []> var_1005_to_fp16 = const()[name = tensor<string, []>("op_1005_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_41_cast_fp16 = mul(x = var_1004_cast_fp16, y = var_1005_to_fp16)[name = tensor<string, []>("attn_41_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_1007_cast_fp16 = sub(x = var_942_to_fp16, y = var_997_cast_fp16)[name = tensor<string, []>("op_1007_cast_fp16")];
tensor<fp16, []> var_1008_to_fp16 = const()[name = tensor<string, []>("op_1008_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_1009_cast_fp16 = mul(x = var_1007_cast_fp16, y = var_1008_to_fp16)[name = tensor<string, []>("op_1009_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_105_cast_fp16 = add(x = attn_41_cast_fp16, y = var_1009_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_43_cast_fp16 = softmax(axis = var_944, x = input_105_cast_fp16)[name = tensor<string, []>("attn_43_cast_fp16")];
tensor<bool, []> out_21_transpose_x_0 = const()[name = tensor<string, []>("out_21_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_21_transpose_y_0 = const()[name = tensor<string, []>("out_21_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_11_cast_fp16 = transpose(perm = var_1001, x = new_v_11)[name = tensor<string, []>("transpose_189")];
tensor<fp16, [1, 12, 1, 64]> out_21_cast_fp16 = matmul(transpose_x = out_21_transpose_x_0, transpose_y = out_21_transpose_y_0, x = attn_43_cast_fp16, y = v4_11_cast_fp16)[name = tensor<string, []>("out_21_cast_fp16")];
tensor<int32, [4]> var_1013_perm_0 = const()[name = tensor<string, []>("op_1013_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1014 = const()[name = tensor<string, []>("op_1014"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1013_cast_fp16 = transpose(perm = var_1013_perm_0, x = out_21_cast_fp16)[name = tensor<string, []>("transpose_186")];
tensor<fp16, [1, 1, 768]> input_107_cast_fp16 = reshape(shape = var_1014, x = var_1013_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
tensor<fp16, [768, 768]> layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81440000)))];
tensor<fp16, [1, 1, 768]> linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
tensor<fp16, []> var_1018_to_fp16 = const()[name = tensor<string, []>("op_1018_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_1019 = add(x = position5, y = var_1018_to_fp16)[name = tensor<string, []>("op_1019_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_109_cast_fp16 = add(x = input_103_cast_fp16, y = linear_26_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
tensor<int32, [1]> x_53_axes_0 = const()[name = tensor<string, []>("x_53_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_5_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82619712)))];
tensor<fp16, [1, 1, 768]> x_53_cast_fp16 = layer_norm(axes = x_53_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_xa_query_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("x_53_cast_fp16")];
tensor<int32, [1]> memory_11_axes_0 = const()[name = tensor<string, []>("memory_11_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_5_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82621312)))];
tensor<fp16, [1, 256, 768]> memory_11_cast_fp16 = layer_norm(axes = memory_11_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_11_cast_fp16")];
tensor<fp16, [128, 768]> layers_5_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82622912)))];
tensor<fp16, [1, 1, 128]> linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_5_cross_attn_q_proj_weight_to_fp16, x = x_53_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
tensor<int32, [4]> var_1040 = const()[name = tensor<string, []>("op_1040"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_1041_cast_fp16 = reshape(shape = var_1040, x = linear_27_cast_fp16)[name = tensor<string, []>("op_1041_cast_fp16")];
tensor<fp16, [256, 768]> layers_5_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82819584)))];
tensor<fp16, [1, 256, 256]> linear_28_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_5_cross_attn_kv_proj_weight_to_fp16, x = memory_11_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
tensor<int32, [5]> var_1045 = const()[name = tensor<string, []>("op_1045"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_11_cast_fp16 = reshape(shape = var_1045, x = linear_28_cast_fp16)[name = tensor<string, []>("kv_11_cast_fp16")];
tensor<int32, [5]> var_1049_begin_0 = const()[name = tensor<string, []>("op_1049_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_1049_end_0 = const()[name = tensor<string, []>("op_1049_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_1049_end_mask_0 = const()[name = tensor<string, []>("op_1049_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1049_squeeze_mask_0 = const()[name = tensor<string, []>("op_1049_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1049_cast_fp16 = slice_by_index(begin = var_1049_begin_0, end = var_1049_end_0, end_mask = var_1049_end_mask_0, squeeze_mask = var_1049_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor<string, []>("op_1049_cast_fp16")];
tensor<int32, [5]> var_1053_begin_0 = const()[name = tensor<string, []>("op_1053_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_1053_end_0 = const()[name = tensor<string, []>("op_1053_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_1053_end_mask_0 = const()[name = tensor<string, []>("op_1053_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1053_squeeze_mask_0 = const()[name = tensor<string, []>("op_1053_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1053_cast_fp16 = slice_by_index(begin = var_1053_begin_0, end = var_1053_end_0, end_mask = var_1053_end_mask_0, squeeze_mask = var_1053_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor<string, []>("op_1053_cast_fp16")];
tensor<int32, [4]> v_23_perm_0 = const()[name = tensor<string, []>("v_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1056_transpose_x_0 = const()[name = tensor<string, []>("op_1056_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1056_transpose_y_0 = const()[name = tensor<string, []>("op_1056_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_94_perm_0 = const()[name = tensor<string, []>("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_95_perm_0 = const()[name = tensor<string, []>("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_95 = transpose(perm = transpose_95_perm_0, x = var_1049_cast_fp16)[name = tensor<string, []>("transpose_183")];
tensor<fp16, [1, 1, 1, 128]> transpose_94 = transpose(perm = transpose_94_perm_0, x = var_1041_cast_fp16)[name = tensor<string, []>("transpose_184")];
tensor<fp16, [1, 1, 1, 256]> var_1056_cast_fp16 = matmul(transpose_x = var_1056_transpose_x_0, transpose_y = var_1056_transpose_y_0, x = transpose_94, y = transpose_95)[name = tensor<string, []>("op_1056_cast_fp16")];
tensor<fp16, []> var_1057_to_fp16 = const()[name = tensor<string, []>("op_1057_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_45_cast_fp16 = mul(x = var_1056_cast_fp16, y = var_1057_to_fp16)[name = tensor<string, []>("attn_45_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_111_cast_fp16 = add(x = attn_45_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_47_cast_fp16 = softmax(axis = var_944, x = input_111_cast_fp16)[name = tensor<string, []>("attn_47_cast_fp16")];
tensor<bool, []> out_23_transpose_x_0 = const()[name = tensor<string, []>("out_23_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_23_transpose_y_0 = const()[name = tensor<string, []>("out_23_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_23_cast_fp16 = transpose(perm = v_23_perm_0, x = var_1053_cast_fp16)[name = tensor<string, []>("transpose_185")];
tensor<fp16, [1, 1, 1, 128]> out_23_cast_fp16 = matmul(transpose_x = out_23_transpose_x_0, transpose_y = out_23_transpose_y_0, x = attn_47_cast_fp16, y = v_23_cast_fp16)[name = tensor<string, []>("out_23_cast_fp16")];
tensor<int32, [4]> var_1068_perm_0 = const()[name = tensor<string, []>("op_1068_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1069 = const()[name = tensor<string, []>("op_1069"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1068_cast_fp16 = transpose(perm = var_1068_perm_0, x = out_23_cast_fp16)[name = tensor<string, []>("transpose_182")];
tensor<fp16, [1, 1, 128]> input_113_cast_fp16 = reshape(shape = var_1069, x = var_1068_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
tensor<fp16, [768, 128]> layers_5_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83212864)))];
tensor<fp16, [1, 1, 768]> linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_cross_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_115_cast_fp16 = add(x = input_109_cast_fp16, y = linear_29_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
tensor<int32, [1]> x_55_axes_0 = const()[name = tensor<string, []>("x_55_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_5_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83409536)))];
tensor<fp16, [1, 1, 768]> x_55_cast_fp16 = layer_norm(axes = x_55_axes_0, epsilon = var_947_to_fp16, gamma = layers_5_norm_ff_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("x_55_cast_fp16")];
tensor<int32, [3]> input_117_perm_0 = const()[name = tensor<string, []>("input_117_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_5_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83411136)))];
tensor<fp16, [1, 768, 1]> input_117_cast_fp16 = transpose(perm = input_117_perm_0, x = x_55_cast_fp16)[name = tensor<string, []>("transpose_181")];
tensor<fp16, [1, 3072, 1]> input_119_cast_fp16 = conv(dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = layers_5_ffn_conv1_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
tensor<string, []> input_121_mode_0 = const()[name = tensor<string, []>("input_121_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
tensor<string, []> x_57_pad_type_0 = const()[name = tensor<string, []>("x_57_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_57_strides_0 = const()[name = tensor<string, []>("x_57_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_57_pad_0 = const()[name = tensor<string, []>("x_57_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_57_dilations_0 = const()[name = tensor<string, []>("x_57_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_57_groups_0 = const()[name = tensor<string, []>("x_57_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_5_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_5_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88129792)))];
tensor<fp16, [1, 768, 1]> x_57_cast_fp16 = conv(dilations = x_57_dilations_0, groups = x_57_groups_0, pad = x_57_pad_0, pad_type = x_57_pad_type_0, strides = x_57_strides_0, weight = layers_5_ffn_conv2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("x_57_cast_fp16")];
tensor<int32, [3]> x_59_perm_0 = const()[name = tensor<string, []>("x_59_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_59_cast_fp16 = transpose(perm = x_59_perm_0, x = x_57_cast_fp16)[name = tensor<string, []>("transpose_180")];
tensor<fp16, [1, 1, 768]> input_123_cast_fp16 = add(x = input_115_cast_fp16, y = x_59_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
tensor<int32, []> var_1114 = const()[name = tensor<string, []>("op_1114"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_61_axes_0 = const()[name = tensor<string, []>("x_61_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_6_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92848448)))];
tensor<fp16, []> var_1117_to_fp16 = const()[name = tensor<string, []>("op_1117_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_61_cast_fp16 = layer_norm(axes = x_61_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_sa_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("x_61_cast_fp16")];
tensor<fp16, [2304, 768]> layers_6_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92850048)))];
tensor<fp16, [1, 1, 2304]> linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_qkv_proj_weight_to_fp16, x = x_61_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
tensor<int32, [5]> var_1136 = const()[name = tensor<string, []>("op_1136"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_27_cast_fp16 = reshape(shape = var_1136, x = linear_30_cast_fp16)[name = tensor<string, []>("qkv_27_cast_fp16")];
tensor<int32, [5]> q_25_begin_0 = const()[name = tensor<string, []>("q_25_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_25_end_0 = const()[name = tensor<string, []>("q_25_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_25_end_mask_0 = const()[name = tensor<string, []>("q_25_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_25_squeeze_mask_0 = const()[name = tensor<string, []>("q_25_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_25_cast_fp16 = slice_by_index(begin = q_25_begin_0, end = q_25_end_0, end_mask = q_25_end_mask_0, squeeze_mask = q_25_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("q_25_cast_fp16")];
tensor<int32, [5]> k_25_begin_0 = const()[name = tensor<string, []>("k_25_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_25_end_0 = const()[name = tensor<string, []>("k_25_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_25_end_mask_0 = const()[name = tensor<string, []>("k_25_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_25_squeeze_mask_0 = const()[name = tensor<string, []>("k_25_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_25_cast_fp16 = slice_by_index(begin = k_25_begin_0, end = k_25_end_0, end_mask = k_25_end_mask_0, squeeze_mask = k_25_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
tensor<int32, [5]> v_25_begin_0 = const()[name = tensor<string, []>("v_25_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_25_end_0 = const()[name = tensor<string, []>("v_25_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_25_end_mask_0 = const()[name = tensor<string, []>("v_25_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_25_squeeze_mask_0 = const()[name = tensor<string, []>("v_25_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_25_cast_fp16 = slice_by_index(begin = v_25_begin_0, end = v_25_end_0, end_mask = v_25_end_mask_0, squeeze_mask = v_25_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("v_25_cast_fp16")];
tensor<bool, [512]> var_1148_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position6)[name = tensor<string, []>("op_1148_cast_fp16")];
tensor<int32, [4]> var_1150 = const()[name = tensor<string, []>("op_1150"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_1149_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1149_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1148_cast_fp16_to_fp16 = cast(dtype = var_1149_to_fp16_dtype_0, x = var_1148_cast_fp16)[name = tensor<string, []>("cast_173")];
tensor<fp16, [1, 512, 1, 1]> mask_13_cast_fp16 = reshape(shape = var_1150, x = var_1148_cast_fp16_to_fp16)[name = tensor<string, []>("mask_13_cast_fp16")];
tensor<int32, [4]> k_new_13_reps_0 = const()[name = tensor<string, []>("k_new_13_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_13_cast_fp16 = tile(reps = k_new_13_reps_0, x = k_25_cast_fp16)[name = tensor<string, []>("k_new_13_cast_fp16")];
tensor<int32, [4]> v_new_13_reps_0 = const()[name = tensor<string, []>("v_new_13_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_13_cast_fp16 = tile(reps = v_new_13_reps_0, x = v_25_cast_fp16)[name = tensor<string, []>("v_new_13_cast_fp16")];
tensor<fp16, []> var_1112_to_fp16 = const()[name = tensor<string, []>("op_1112_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_1156_cast_fp16 = sub(x = var_1112_to_fp16, y = mask_13_cast_fp16)[name = tensor<string, []>("op_1156_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1157_cast_fp16 = mul(x = cache_k6, y = var_1156_cast_fp16)[name = tensor<string, []>("op_1157_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1158_cast_fp16 = mul(x = k_new_13_cast_fp16, y = mask_13_cast_fp16)[name = tensor<string, []>("op_1158_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_13 = add(x = var_1157_cast_fp16, y = var_1158_cast_fp16)[name = tensor<string, []>("new_k_13_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1161_cast_fp16 = mul(x = cache_v6, y = var_1156_cast_fp16)[name = tensor<string, []>("op_1161_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1162_cast_fp16 = mul(x = v_new_13_cast_fp16, y = mask_13_cast_fp16)[name = tensor<string, []>("op_1162_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_13 = add(x = var_1161_cast_fp16, y = var_1162_cast_fp16)[name = tensor<string, []>("new_v_13_cast_fp16")];
tensor<bool, [512]> var_1164_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position6)[name = tensor<string, []>("op_1164_cast_fp16")];
tensor<int32, [4]> var_1166 = const()[name = tensor<string, []>("op_1166"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_1165_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1165_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1164_cast_fp16_to_fp16 = cast(dtype = var_1165_to_fp16_dtype_0, x = var_1164_cast_fp16)[name = tensor<string, []>("cast_172")];
tensor<fp16, [1, 1, 1, 512]> var_1167_cast_fp16 = reshape(shape = var_1166, x = var_1164_cast_fp16_to_fp16)[name = tensor<string, []>("op_1167_cast_fp16")];
tensor<int32, [4]> var_1171 = const()[name = tensor<string, []>("op_1171"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1174_transpose_x_0 = const()[name = tensor<string, []>("op_1174_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1174_transpose_y_0 = const()[name = tensor<string, []>("op_1174_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_96_perm_0 = const()[name = tensor<string, []>("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_97_perm_0 = const()[name = tensor<string, []>("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_97 = transpose(perm = transpose_97_perm_0, x = new_k_13)[name = tensor<string, []>("transpose_177")];
tensor<fp16, [1, 12, 1, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = q_25_cast_fp16)[name = tensor<string, []>("transpose_178")];
tensor<fp16, [1, 12, 1, 512]> var_1174_cast_fp16 = matmul(transpose_x = var_1174_transpose_x_0, transpose_y = var_1174_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor<string, []>("op_1174_cast_fp16")];
tensor<fp16, []> var_1175_to_fp16 = const()[name = tensor<string, []>("op_1175_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_49_cast_fp16 = mul(x = var_1174_cast_fp16, y = var_1175_to_fp16)[name = tensor<string, []>("attn_49_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_1177_cast_fp16 = sub(x = var_1112_to_fp16, y = var_1167_cast_fp16)[name = tensor<string, []>("op_1177_cast_fp16")];
tensor<fp16, []> var_1178_to_fp16 = const()[name = tensor<string, []>("op_1178_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_1179_cast_fp16 = mul(x = var_1177_cast_fp16, y = var_1178_to_fp16)[name = tensor<string, []>("op_1179_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_125_cast_fp16 = add(x = attn_49_cast_fp16, y = var_1179_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_51_cast_fp16 = softmax(axis = var_1114, x = input_125_cast_fp16)[name = tensor<string, []>("attn_51_cast_fp16")];
tensor<bool, []> out_25_transpose_x_0 = const()[name = tensor<string, []>("out_25_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_25_transpose_y_0 = const()[name = tensor<string, []>("out_25_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_13_cast_fp16 = transpose(perm = var_1171, x = new_v_13)[name = tensor<string, []>("transpose_179")];
tensor<fp16, [1, 12, 1, 64]> out_25_cast_fp16 = matmul(transpose_x = out_25_transpose_x_0, transpose_y = out_25_transpose_y_0, x = attn_51_cast_fp16, y = v4_13_cast_fp16)[name = tensor<string, []>("out_25_cast_fp16")];
tensor<int32, [4]> var_1183_perm_0 = const()[name = tensor<string, []>("op_1183_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1184 = const()[name = tensor<string, []>("op_1184"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1183_cast_fp16 = transpose(perm = var_1183_perm_0, x = out_25_cast_fp16)[name = tensor<string, []>("transpose_176")];
tensor<fp16, [1, 1, 768]> input_127_cast_fp16 = reshape(shape = var_1184, x = var_1183_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
tensor<fp16, [768, 768]> layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96389056)))];
tensor<fp16, [1, 1, 768]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
tensor<fp16, []> var_1188_to_fp16 = const()[name = tensor<string, []>("op_1188_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_1189 = add(x = position6, y = var_1188_to_fp16)[name = tensor<string, []>("op_1189_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_129_cast_fp16 = add(x = input_123_cast_fp16, y = linear_31_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
tensor<int32, [1]> x_63_axes_0 = const()[name = tensor<string, []>("x_63_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_6_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97568768)))];
tensor<fp16, [1, 1, 768]> x_63_cast_fp16 = layer_norm(axes = x_63_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_xa_query_weight_to_fp16, x = input_129_cast_fp16)[name = tensor<string, []>("x_63_cast_fp16")];
tensor<int32, [1]> memory_13_axes_0 = const()[name = tensor<string, []>("memory_13_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_6_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97570368)))];
tensor<fp16, [1, 256, 768]> memory_13_cast_fp16 = layer_norm(axes = memory_13_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_13_cast_fp16")];
tensor<fp16, [128, 768]> layers_6_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97571968)))];
tensor<fp16, [1, 1, 128]> linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_6_cross_attn_q_proj_weight_to_fp16, x = x_63_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
tensor<int32, [4]> var_1210 = const()[name = tensor<string, []>("op_1210"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_1211_cast_fp16 = reshape(shape = var_1210, x = linear_32_cast_fp16)[name = tensor<string, []>("op_1211_cast_fp16")];
tensor<fp16, [256, 768]> layers_6_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97768640)))];
tensor<fp16, [1, 256, 256]> linear_33_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_6_cross_attn_kv_proj_weight_to_fp16, x = memory_13_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
tensor<int32, [5]> var_1215 = const()[name = tensor<string, []>("op_1215"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_13_cast_fp16 = reshape(shape = var_1215, x = linear_33_cast_fp16)[name = tensor<string, []>("kv_13_cast_fp16")];
tensor<int32, [5]> var_1219_begin_0 = const()[name = tensor<string, []>("op_1219_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_1219_end_0 = const()[name = tensor<string, []>("op_1219_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_1219_end_mask_0 = const()[name = tensor<string, []>("op_1219_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1219_squeeze_mask_0 = const()[name = tensor<string, []>("op_1219_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1219_cast_fp16 = slice_by_index(begin = var_1219_begin_0, end = var_1219_end_0, end_mask = var_1219_end_mask_0, squeeze_mask = var_1219_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor<string, []>("op_1219_cast_fp16")];
tensor<int32, [5]> var_1223_begin_0 = const()[name = tensor<string, []>("op_1223_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_1223_end_0 = const()[name = tensor<string, []>("op_1223_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_1223_end_mask_0 = const()[name = tensor<string, []>("op_1223_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1223_squeeze_mask_0 = const()[name = tensor<string, []>("op_1223_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1223_cast_fp16 = slice_by_index(begin = var_1223_begin_0, end = var_1223_end_0, end_mask = var_1223_end_mask_0, squeeze_mask = var_1223_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor<string, []>("op_1223_cast_fp16")];
tensor<int32, [4]> v_27_perm_0 = const()[name = tensor<string, []>("v_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1226_transpose_x_0 = const()[name = tensor<string, []>("op_1226_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1226_transpose_y_0 = const()[name = tensor<string, []>("op_1226_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_98_perm_0 = const()[name = tensor<string, []>("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_99_perm_0 = const()[name = tensor<string, []>("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_99 = transpose(perm = transpose_99_perm_0, x = var_1219_cast_fp16)[name = tensor<string, []>("transpose_173")];
tensor<fp16, [1, 1, 1, 128]> transpose_98 = transpose(perm = transpose_98_perm_0, x = var_1211_cast_fp16)[name = tensor<string, []>("transpose_174")];
tensor<fp16, [1, 1, 1, 256]> var_1226_cast_fp16 = matmul(transpose_x = var_1226_transpose_x_0, transpose_y = var_1226_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor<string, []>("op_1226_cast_fp16")];
tensor<fp16, []> var_1227_to_fp16 = const()[name = tensor<string, []>("op_1227_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_53_cast_fp16 = mul(x = var_1226_cast_fp16, y = var_1227_to_fp16)[name = tensor<string, []>("attn_53_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_131_cast_fp16 = add(x = attn_53_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_55_cast_fp16 = softmax(axis = var_1114, x = input_131_cast_fp16)[name = tensor<string, []>("attn_55_cast_fp16")];
tensor<bool, []> out_27_transpose_x_0 = const()[name = tensor<string, []>("out_27_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_27_transpose_y_0 = const()[name = tensor<string, []>("out_27_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_27_cast_fp16 = transpose(perm = v_27_perm_0, x = var_1223_cast_fp16)[name = tensor<string, []>("transpose_175")];
tensor<fp16, [1, 1, 1, 128]> out_27_cast_fp16 = matmul(transpose_x = out_27_transpose_x_0, transpose_y = out_27_transpose_y_0, x = attn_55_cast_fp16, y = v_27_cast_fp16)[name = tensor<string, []>("out_27_cast_fp16")];
tensor<int32, [4]> var_1238_perm_0 = const()[name = tensor<string, []>("op_1238_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1239 = const()[name = tensor<string, []>("op_1239"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1238_cast_fp16 = transpose(perm = var_1238_perm_0, x = out_27_cast_fp16)[name = tensor<string, []>("transpose_172")];
tensor<fp16, [1, 1, 128]> input_133_cast_fp16 = reshape(shape = var_1239, x = var_1238_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
tensor<fp16, [768, 128]> layers_6_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98161920)))];
tensor<fp16, [1, 1, 768]> linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_cross_attn_o_proj_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_34_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
tensor<int32, [1]> x_65_axes_0 = const()[name = tensor<string, []>("x_65_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_6_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98358592)))];
tensor<fp16, [1, 1, 768]> x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_1117_to_fp16, gamma = layers_6_norm_ff_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("x_65_cast_fp16")];
tensor<int32, [3]> input_137_perm_0 = const()[name = tensor<string, []>("input_137_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_139_pad_type_0 = const()[name = tensor<string, []>("input_139_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_139_strides_0 = const()[name = tensor<string, []>("input_139_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_139_pad_0 = const()[name = tensor<string, []>("input_139_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_139_dilations_0 = const()[name = tensor<string, []>("input_139_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_139_groups_0 = const()[name = tensor<string, []>("input_139_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_6_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98360192)))];
tensor<fp16, [1, 768, 1]> input_137_cast_fp16 = transpose(perm = input_137_perm_0, x = x_65_cast_fp16)[name = tensor<string, []>("transpose_171")];
tensor<fp16, [1, 3072, 1]> input_139_cast_fp16 = conv(dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = layers_6_ffn_conv1_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
tensor<string, []> x_67_pad_type_0 = const()[name = tensor<string, []>("x_67_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_67_strides_0 = const()[name = tensor<string, []>("x_67_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_67_pad_0 = const()[name = tensor<string, []>("x_67_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_67_dilations_0 = const()[name = tensor<string, []>("x_67_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_67_groups_0 = const()[name = tensor<string, []>("x_67_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_6_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_6_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103078848)))];
tensor<fp16, [1, 768, 1]> x_67_cast_fp16 = conv(dilations = x_67_dilations_0, groups = x_67_groups_0, pad = x_67_pad_0, pad_type = x_67_pad_type_0, strides = x_67_strides_0, weight = layers_6_ffn_conv2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("x_67_cast_fp16")];
tensor<int32, [3]> x_69_perm_0 = const()[name = tensor<string, []>("x_69_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_69_cast_fp16 = transpose(perm = x_69_perm_0, x = x_67_cast_fp16)[name = tensor<string, []>("transpose_170")];
tensor<fp16, [1, 1, 768]> input_143_cast_fp16 = add(x = input_135_cast_fp16, y = x_69_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
tensor<int32, []> var_1284 = const()[name = tensor<string, []>("op_1284"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_71_axes_0 = const()[name = tensor<string, []>("x_71_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_7_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107797504)))];
tensor<fp16, []> var_1287_to_fp16 = const()[name = tensor<string, []>("op_1287_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_71_cast_fp16 = layer_norm(axes = x_71_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_sa_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("x_71_cast_fp16")];
tensor<fp16, [2304, 768]> layers_7_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107799104)))];
tensor<fp16, [1, 1, 2304]> linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_qkv_proj_weight_to_fp16, x = x_71_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
tensor<int32, [5]> var_1306 = const()[name = tensor<string, []>("op_1306"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_31_cast_fp16 = reshape(shape = var_1306, x = linear_35_cast_fp16)[name = tensor<string, []>("qkv_31_cast_fp16")];
tensor<int32, [5]> q_29_begin_0 = const()[name = tensor<string, []>("q_29_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_29_end_0 = const()[name = tensor<string, []>("q_29_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_29_end_mask_0 = const()[name = tensor<string, []>("q_29_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_29_squeeze_mask_0 = const()[name = tensor<string, []>("q_29_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_29_cast_fp16 = slice_by_index(begin = q_29_begin_0, end = q_29_end_0, end_mask = q_29_end_mask_0, squeeze_mask = q_29_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("q_29_cast_fp16")];
tensor<int32, [5]> k_29_begin_0 = const()[name = tensor<string, []>("k_29_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_29_end_0 = const()[name = tensor<string, []>("k_29_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_29_end_mask_0 = const()[name = tensor<string, []>("k_29_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_29_squeeze_mask_0 = const()[name = tensor<string, []>("k_29_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_29_cast_fp16 = slice_by_index(begin = k_29_begin_0, end = k_29_end_0, end_mask = k_29_end_mask_0, squeeze_mask = k_29_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
tensor<int32, [5]> v_29_begin_0 = const()[name = tensor<string, []>("v_29_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_29_end_0 = const()[name = tensor<string, []>("v_29_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_29_end_mask_0 = const()[name = tensor<string, []>("v_29_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_29_squeeze_mask_0 = const()[name = tensor<string, []>("v_29_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_29_cast_fp16 = slice_by_index(begin = v_29_begin_0, end = v_29_end_0, end_mask = v_29_end_mask_0, squeeze_mask = v_29_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("v_29_cast_fp16")];
tensor<bool, [512]> var_1318_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position7)[name = tensor<string, []>("op_1318_cast_fp16")];
tensor<int32, [4]> var_1320 = const()[name = tensor<string, []>("op_1320"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_1319_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1319_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1318_cast_fp16_to_fp16 = cast(dtype = var_1319_to_fp16_dtype_0, x = var_1318_cast_fp16)[name = tensor<string, []>("cast_171")];
tensor<fp16, [1, 512, 1, 1]> mask_15_cast_fp16 = reshape(shape = var_1320, x = var_1318_cast_fp16_to_fp16)[name = tensor<string, []>("mask_15_cast_fp16")];
tensor<int32, [4]> k_new_15_reps_0 = const()[name = tensor<string, []>("k_new_15_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_15_cast_fp16 = tile(reps = k_new_15_reps_0, x = k_29_cast_fp16)[name = tensor<string, []>("k_new_15_cast_fp16")];
tensor<int32, [4]> v_new_15_reps_0 = const()[name = tensor<string, []>("v_new_15_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_15_cast_fp16 = tile(reps = v_new_15_reps_0, x = v_29_cast_fp16)[name = tensor<string, []>("v_new_15_cast_fp16")];
tensor<fp16, []> var_1282_to_fp16 = const()[name = tensor<string, []>("op_1282_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_1326_cast_fp16 = sub(x = var_1282_to_fp16, y = mask_15_cast_fp16)[name = tensor<string, []>("op_1326_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1327_cast_fp16 = mul(x = cache_k7, y = var_1326_cast_fp16)[name = tensor<string, []>("op_1327_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1328_cast_fp16 = mul(x = k_new_15_cast_fp16, y = mask_15_cast_fp16)[name = tensor<string, []>("op_1328_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_15 = add(x = var_1327_cast_fp16, y = var_1328_cast_fp16)[name = tensor<string, []>("new_k_15_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1331_cast_fp16 = mul(x = cache_v7, y = var_1326_cast_fp16)[name = tensor<string, []>("op_1331_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1332_cast_fp16 = mul(x = v_new_15_cast_fp16, y = mask_15_cast_fp16)[name = tensor<string, []>("op_1332_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_15 = add(x = var_1331_cast_fp16, y = var_1332_cast_fp16)[name = tensor<string, []>("new_v_15_cast_fp16")];
tensor<bool, [512]> var_1334_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position7)[name = tensor<string, []>("op_1334_cast_fp16")];
tensor<int32, [4]> var_1336 = const()[name = tensor<string, []>("op_1336"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_1335_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1335_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1334_cast_fp16_to_fp16 = cast(dtype = var_1335_to_fp16_dtype_0, x = var_1334_cast_fp16)[name = tensor<string, []>("cast_170")];
tensor<fp16, [1, 1, 1, 512]> var_1337_cast_fp16 = reshape(shape = var_1336, x = var_1334_cast_fp16_to_fp16)[name = tensor<string, []>("op_1337_cast_fp16")];
tensor<int32, [4]> var_1341 = const()[name = tensor<string, []>("op_1341"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1344_transpose_x_0 = const()[name = tensor<string, []>("op_1344_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1344_transpose_y_0 = const()[name = tensor<string, []>("op_1344_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_100_perm_0 = const()[name = tensor<string, []>("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_101_perm_0 = const()[name = tensor<string, []>("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_101 = transpose(perm = transpose_101_perm_0, x = new_k_15)[name = tensor<string, []>("transpose_167")];
tensor<fp16, [1, 12, 1, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = q_29_cast_fp16)[name = tensor<string, []>("transpose_168")];
tensor<fp16, [1, 12, 1, 512]> var_1344_cast_fp16 = matmul(transpose_x = var_1344_transpose_x_0, transpose_y = var_1344_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor<string, []>("op_1344_cast_fp16")];
tensor<fp16, []> var_1345_to_fp16 = const()[name = tensor<string, []>("op_1345_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_57_cast_fp16 = mul(x = var_1344_cast_fp16, y = var_1345_to_fp16)[name = tensor<string, []>("attn_57_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_1347_cast_fp16 = sub(x = var_1282_to_fp16, y = var_1337_cast_fp16)[name = tensor<string, []>("op_1347_cast_fp16")];
tensor<fp16, []> var_1348_to_fp16 = const()[name = tensor<string, []>("op_1348_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_1349_cast_fp16 = mul(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor<string, []>("op_1349_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_145_cast_fp16 = add(x = attn_57_cast_fp16, y = var_1349_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_59_cast_fp16 = softmax(axis = var_1284, x = input_145_cast_fp16)[name = tensor<string, []>("attn_59_cast_fp16")];
tensor<bool, []> out_29_transpose_x_0 = const()[name = tensor<string, []>("out_29_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_29_transpose_y_0 = const()[name = tensor<string, []>("out_29_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_15_cast_fp16 = transpose(perm = var_1341, x = new_v_15)[name = tensor<string, []>("transpose_169")];
tensor<fp16, [1, 12, 1, 64]> out_29_cast_fp16 = matmul(transpose_x = out_29_transpose_x_0, transpose_y = out_29_transpose_y_0, x = attn_59_cast_fp16, y = v4_15_cast_fp16)[name = tensor<string, []>("out_29_cast_fp16")];
tensor<int32, [4]> var_1353_perm_0 = const()[name = tensor<string, []>("op_1353_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1354 = const()[name = tensor<string, []>("op_1354"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1353_cast_fp16 = transpose(perm = var_1353_perm_0, x = out_29_cast_fp16)[name = tensor<string, []>("transpose_166")];
tensor<fp16, [1, 1, 768]> input_147_cast_fp16 = reshape(shape = var_1354, x = var_1353_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
tensor<fp16, [768, 768]> layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111338112)))];
tensor<fp16, [1, 1, 768]> linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
tensor<fp16, []> var_1358_to_fp16 = const()[name = tensor<string, []>("op_1358_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_1359 = add(x = position7, y = var_1358_to_fp16)[name = tensor<string, []>("op_1359_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_36_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
tensor<int32, [1]> x_73_axes_0 = const()[name = tensor<string, []>("x_73_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_7_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112517824)))];
tensor<fp16, [1, 1, 768]> x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_xa_query_weight_to_fp16, x = input_149_cast_fp16)[name = tensor<string, []>("x_73_cast_fp16")];
tensor<int32, [1]> memory_15_axes_0 = const()[name = tensor<string, []>("memory_15_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_7_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112519424)))];
tensor<fp16, [1, 256, 768]> memory_15_cast_fp16 = layer_norm(axes = memory_15_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_15_cast_fp16")];
tensor<fp16, [128, 768]> layers_7_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112521024)))];
tensor<fp16, [1, 1, 128]> linear_37_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_7_cross_attn_q_proj_weight_to_fp16, x = x_73_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
tensor<int32, [4]> var_1380 = const()[name = tensor<string, []>("op_1380"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_1381_cast_fp16 = reshape(shape = var_1380, x = linear_37_cast_fp16)[name = tensor<string, []>("op_1381_cast_fp16")];
tensor<fp16, [256, 768]> layers_7_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112717696)))];
tensor<fp16, [1, 256, 256]> linear_38_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_7_cross_attn_kv_proj_weight_to_fp16, x = memory_15_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
tensor<int32, [5]> var_1385 = const()[name = tensor<string, []>("op_1385"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_15_cast_fp16 = reshape(shape = var_1385, x = linear_38_cast_fp16)[name = tensor<string, []>("kv_15_cast_fp16")];
tensor<int32, [5]> var_1389_begin_0 = const()[name = tensor<string, []>("op_1389_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_1389_end_0 = const()[name = tensor<string, []>("op_1389_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_1389_end_mask_0 = const()[name = tensor<string, []>("op_1389_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1389_squeeze_mask_0 = const()[name = tensor<string, []>("op_1389_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1389_cast_fp16 = slice_by_index(begin = var_1389_begin_0, end = var_1389_end_0, end_mask = var_1389_end_mask_0, squeeze_mask = var_1389_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor<string, []>("op_1389_cast_fp16")];
tensor<int32, [5]> var_1393_begin_0 = const()[name = tensor<string, []>("op_1393_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_1393_end_0 = const()[name = tensor<string, []>("op_1393_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_1393_end_mask_0 = const()[name = tensor<string, []>("op_1393_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1393_squeeze_mask_0 = const()[name = tensor<string, []>("op_1393_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1393_cast_fp16 = slice_by_index(begin = var_1393_begin_0, end = var_1393_end_0, end_mask = var_1393_end_mask_0, squeeze_mask = var_1393_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor<string, []>("op_1393_cast_fp16")];
tensor<int32, [4]> v_31_perm_0 = const()[name = tensor<string, []>("v_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1396_transpose_x_0 = const()[name = tensor<string, []>("op_1396_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1396_transpose_y_0 = const()[name = tensor<string, []>("op_1396_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_102_perm_0 = const()[name = tensor<string, []>("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_103_perm_0 = const()[name = tensor<string, []>("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_103 = transpose(perm = transpose_103_perm_0, x = var_1389_cast_fp16)[name = tensor<string, []>("transpose_163")];
tensor<fp16, [1, 1, 1, 128]> transpose_102 = transpose(perm = transpose_102_perm_0, x = var_1381_cast_fp16)[name = tensor<string, []>("transpose_164")];
tensor<fp16, [1, 1, 1, 256]> var_1396_cast_fp16 = matmul(transpose_x = var_1396_transpose_x_0, transpose_y = var_1396_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor<string, []>("op_1396_cast_fp16")];
tensor<fp16, []> var_1397_to_fp16 = const()[name = tensor<string, []>("op_1397_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_61_cast_fp16 = mul(x = var_1396_cast_fp16, y = var_1397_to_fp16)[name = tensor<string, []>("attn_61_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_151_cast_fp16 = add(x = attn_61_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_63_cast_fp16 = softmax(axis = var_1284, x = input_151_cast_fp16)[name = tensor<string, []>("attn_63_cast_fp16")];
tensor<bool, []> out_31_transpose_x_0 = const()[name = tensor<string, []>("out_31_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_31_transpose_y_0 = const()[name = tensor<string, []>("out_31_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_31_cast_fp16 = transpose(perm = v_31_perm_0, x = var_1393_cast_fp16)[name = tensor<string, []>("transpose_165")];
tensor<fp16, [1, 1, 1, 128]> out_31_cast_fp16 = matmul(transpose_x = out_31_transpose_x_0, transpose_y = out_31_transpose_y_0, x = attn_63_cast_fp16, y = v_31_cast_fp16)[name = tensor<string, []>("out_31_cast_fp16")];
tensor<int32, [4]> var_1408_perm_0 = const()[name = tensor<string, []>("op_1408_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1409 = const()[name = tensor<string, []>("op_1409"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1408_cast_fp16 = transpose(perm = var_1408_perm_0, x = out_31_cast_fp16)[name = tensor<string, []>("transpose_162")];
tensor<fp16, [1, 1, 128]> input_153_cast_fp16 = reshape(shape = var_1409, x = var_1408_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
tensor<fp16, [768, 128]> layers_7_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113110976)))];
tensor<fp16, [1, 1, 768]> linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_cross_attn_o_proj_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_39_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
tensor<int32, [1]> x_75_axes_0 = const()[name = tensor<string, []>("x_75_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_7_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113307648)))];
tensor<fp16, [1, 1, 768]> x_75_cast_fp16 = layer_norm(axes = x_75_axes_0, epsilon = var_1287_to_fp16, gamma = layers_7_norm_ff_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("x_75_cast_fp16")];
tensor<int32, [3]> input_157_perm_0 = const()[name = tensor<string, []>("input_157_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_159_pad_type_0 = const()[name = tensor<string, []>("input_159_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_159_strides_0 = const()[name = tensor<string, []>("input_159_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_159_pad_0 = const()[name = tensor<string, []>("input_159_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_159_dilations_0 = const()[name = tensor<string, []>("input_159_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_159_groups_0 = const()[name = tensor<string, []>("input_159_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_7_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113309248)))];
tensor<fp16, [1, 768, 1]> input_157_cast_fp16 = transpose(perm = input_157_perm_0, x = x_75_cast_fp16)[name = tensor<string, []>("transpose_161")];
tensor<fp16, [1, 3072, 1]> input_159_cast_fp16 = conv(dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = layers_7_ffn_conv1_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
tensor<string, []> input_161_mode_0 = const()[name = tensor<string, []>("input_161_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
tensor<string, []> x_77_pad_type_0 = const()[name = tensor<string, []>("x_77_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_77_strides_0 = const()[name = tensor<string, []>("x_77_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_77_pad_0 = const()[name = tensor<string, []>("x_77_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_77_dilations_0 = const()[name = tensor<string, []>("x_77_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_77_groups_0 = const()[name = tensor<string, []>("x_77_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_7_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_7_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118027904)))];
tensor<fp16, [1, 768, 1]> x_77_cast_fp16 = conv(dilations = x_77_dilations_0, groups = x_77_groups_0, pad = x_77_pad_0, pad_type = x_77_pad_type_0, strides = x_77_strides_0, weight = layers_7_ffn_conv2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("x_77_cast_fp16")];
tensor<int32, [3]> x_79_perm_0 = const()[name = tensor<string, []>("x_79_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_79_cast_fp16 = transpose(perm = x_79_perm_0, x = x_77_cast_fp16)[name = tensor<string, []>("transpose_160")];
tensor<fp16, [1, 1, 768]> input_163_cast_fp16 = add(x = input_155_cast_fp16, y = x_79_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
tensor<int32, []> var_1454 = const()[name = tensor<string, []>("op_1454"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_81_axes_0 = const()[name = tensor<string, []>("x_81_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_8_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122746560)))];
tensor<fp16, []> var_1457_to_fp16 = const()[name = tensor<string, []>("op_1457_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_sa_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
tensor<fp16, [2304, 768]> layers_8_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122748160)))];
tensor<fp16, [1, 1, 2304]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_qkv_proj_weight_to_fp16, x = x_81_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
tensor<int32, [5]> var_1476 = const()[name = tensor<string, []>("op_1476"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_35_cast_fp16 = reshape(shape = var_1476, x = linear_40_cast_fp16)[name = tensor<string, []>("qkv_35_cast_fp16")];
tensor<int32, [5]> q_33_begin_0 = const()[name = tensor<string, []>("q_33_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_33_end_0 = const()[name = tensor<string, []>("q_33_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_33_end_mask_0 = const()[name = tensor<string, []>("q_33_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_33_squeeze_mask_0 = const()[name = tensor<string, []>("q_33_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_33_cast_fp16 = slice_by_index(begin = q_33_begin_0, end = q_33_end_0, end_mask = q_33_end_mask_0, squeeze_mask = q_33_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("q_33_cast_fp16")];
tensor<int32, [5]> k_33_begin_0 = const()[name = tensor<string, []>("k_33_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_33_end_0 = const()[name = tensor<string, []>("k_33_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_33_end_mask_0 = const()[name = tensor<string, []>("k_33_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_33_squeeze_mask_0 = const()[name = tensor<string, []>("k_33_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_33_cast_fp16 = slice_by_index(begin = k_33_begin_0, end = k_33_end_0, end_mask = k_33_end_mask_0, squeeze_mask = k_33_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
tensor<int32, [5]> v_33_begin_0 = const()[name = tensor<string, []>("v_33_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_33_end_0 = const()[name = tensor<string, []>("v_33_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_33_end_mask_0 = const()[name = tensor<string, []>("v_33_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_33_squeeze_mask_0 = const()[name = tensor<string, []>("v_33_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_33_cast_fp16 = slice_by_index(begin = v_33_begin_0, end = v_33_end_0, end_mask = v_33_end_mask_0, squeeze_mask = v_33_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("v_33_cast_fp16")];
tensor<bool, [512]> var_1488_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position8)[name = tensor<string, []>("op_1488_cast_fp16")];
tensor<int32, [4]> var_1490 = const()[name = tensor<string, []>("op_1490"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_1489_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1489_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1488_cast_fp16_to_fp16 = cast(dtype = var_1489_to_fp16_dtype_0, x = var_1488_cast_fp16)[name = tensor<string, []>("cast_169")];
tensor<fp16, [1, 512, 1, 1]> mask_17_cast_fp16 = reshape(shape = var_1490, x = var_1488_cast_fp16_to_fp16)[name = tensor<string, []>("mask_17_cast_fp16")];
tensor<int32, [4]> k_new_17_reps_0 = const()[name = tensor<string, []>("k_new_17_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_17_cast_fp16 = tile(reps = k_new_17_reps_0, x = k_33_cast_fp16)[name = tensor<string, []>("k_new_17_cast_fp16")];
tensor<int32, [4]> v_new_17_reps_0 = const()[name = tensor<string, []>("v_new_17_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_17_cast_fp16 = tile(reps = v_new_17_reps_0, x = v_33_cast_fp16)[name = tensor<string, []>("v_new_17_cast_fp16")];
tensor<fp16, []> var_1452_to_fp16 = const()[name = tensor<string, []>("op_1452_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_1496_cast_fp16 = sub(x = var_1452_to_fp16, y = mask_17_cast_fp16)[name = tensor<string, []>("op_1496_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1497_cast_fp16 = mul(x = cache_k8, y = var_1496_cast_fp16)[name = tensor<string, []>("op_1497_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1498_cast_fp16 = mul(x = k_new_17_cast_fp16, y = mask_17_cast_fp16)[name = tensor<string, []>("op_1498_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_17 = add(x = var_1497_cast_fp16, y = var_1498_cast_fp16)[name = tensor<string, []>("new_k_17_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1501_cast_fp16 = mul(x = cache_v8, y = var_1496_cast_fp16)[name = tensor<string, []>("op_1501_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1502_cast_fp16 = mul(x = v_new_17_cast_fp16, y = mask_17_cast_fp16)[name = tensor<string, []>("op_1502_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_17 = add(x = var_1501_cast_fp16, y = var_1502_cast_fp16)[name = tensor<string, []>("new_v_17_cast_fp16")];
tensor<bool, [512]> var_1504_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position8)[name = tensor<string, []>("op_1504_cast_fp16")];
tensor<int32, [4]> var_1506 = const()[name = tensor<string, []>("op_1506"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_1505_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1505_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1504_cast_fp16_to_fp16 = cast(dtype = var_1505_to_fp16_dtype_0, x = var_1504_cast_fp16)[name = tensor<string, []>("cast_168")];
tensor<fp16, [1, 1, 1, 512]> var_1507_cast_fp16 = reshape(shape = var_1506, x = var_1504_cast_fp16_to_fp16)[name = tensor<string, []>("op_1507_cast_fp16")];
tensor<int32, [4]> var_1511 = const()[name = tensor<string, []>("op_1511"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1514_transpose_x_0 = const()[name = tensor<string, []>("op_1514_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1514_transpose_y_0 = const()[name = tensor<string, []>("op_1514_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_104_perm_0 = const()[name = tensor<string, []>("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_105_perm_0 = const()[name = tensor<string, []>("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_105 = transpose(perm = transpose_105_perm_0, x = new_k_17)[name = tensor<string, []>("transpose_157")];
tensor<fp16, [1, 12, 1, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = q_33_cast_fp16)[name = tensor<string, []>("transpose_158")];
tensor<fp16, [1, 12, 1, 512]> var_1514_cast_fp16 = matmul(transpose_x = var_1514_transpose_x_0, transpose_y = var_1514_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor<string, []>("op_1514_cast_fp16")];
tensor<fp16, []> var_1515_to_fp16 = const()[name = tensor<string, []>("op_1515_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_65_cast_fp16 = mul(x = var_1514_cast_fp16, y = var_1515_to_fp16)[name = tensor<string, []>("attn_65_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_1517_cast_fp16 = sub(x = var_1452_to_fp16, y = var_1507_cast_fp16)[name = tensor<string, []>("op_1517_cast_fp16")];
tensor<fp16, []> var_1518_to_fp16 = const()[name = tensor<string, []>("op_1518_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_1519_cast_fp16 = mul(x = var_1517_cast_fp16, y = var_1518_to_fp16)[name = tensor<string, []>("op_1519_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_165_cast_fp16 = add(x = attn_65_cast_fp16, y = var_1519_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_67_cast_fp16 = softmax(axis = var_1454, x = input_165_cast_fp16)[name = tensor<string, []>("attn_67_cast_fp16")];
tensor<bool, []> out_33_transpose_x_0 = const()[name = tensor<string, []>("out_33_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_33_transpose_y_0 = const()[name = tensor<string, []>("out_33_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_17_cast_fp16 = transpose(perm = var_1511, x = new_v_17)[name = tensor<string, []>("transpose_159")];
tensor<fp16, [1, 12, 1, 64]> out_33_cast_fp16 = matmul(transpose_x = out_33_transpose_x_0, transpose_y = out_33_transpose_y_0, x = attn_67_cast_fp16, y = v4_17_cast_fp16)[name = tensor<string, []>("out_33_cast_fp16")];
tensor<int32, [4]> var_1523_perm_0 = const()[name = tensor<string, []>("op_1523_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1524 = const()[name = tensor<string, []>("op_1524"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1523_cast_fp16 = transpose(perm = var_1523_perm_0, x = out_33_cast_fp16)[name = tensor<string, []>("transpose_156")];
tensor<fp16, [1, 1, 768]> input_167_cast_fp16 = reshape(shape = var_1524, x = var_1523_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
tensor<fp16, [768, 768]> layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126287168)))];
tensor<fp16, [1, 1, 768]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
tensor<fp16, []> var_1528_to_fp16 = const()[name = tensor<string, []>("op_1528_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_1529 = add(x = position8, y = var_1528_to_fp16)[name = tensor<string, []>("op_1529_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_41_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
tensor<int32, [1]> x_83_axes_0 = const()[name = tensor<string, []>("x_83_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_8_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127466880)))];
tensor<fp16, [1, 1, 768]> x_83_cast_fp16 = layer_norm(axes = x_83_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_xa_query_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("x_83_cast_fp16")];
tensor<int32, [1]> memory_17_axes_0 = const()[name = tensor<string, []>("memory_17_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_8_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127468480)))];
tensor<fp16, [1, 256, 768]> memory_17_cast_fp16 = layer_norm(axes = memory_17_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_17_cast_fp16")];
tensor<fp16, [128, 768]> layers_8_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127470080)))];
tensor<fp16, [1, 1, 128]> linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_8_cross_attn_q_proj_weight_to_fp16, x = x_83_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")];
tensor<int32, [4]> var_1550 = const()[name = tensor<string, []>("op_1550"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_1551_cast_fp16 = reshape(shape = var_1550, x = linear_42_cast_fp16)[name = tensor<string, []>("op_1551_cast_fp16")];
tensor<fp16, [256, 768]> layers_8_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127666752)))];
tensor<fp16, [1, 256, 256]> linear_43_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_8_cross_attn_kv_proj_weight_to_fp16, x = memory_17_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
tensor<int32, [5]> var_1555 = const()[name = tensor<string, []>("op_1555"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_17_cast_fp16 = reshape(shape = var_1555, x = linear_43_cast_fp16)[name = tensor<string, []>("kv_17_cast_fp16")];
tensor<int32, [5]> var_1559_begin_0 = const()[name = tensor<string, []>("op_1559_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_1559_end_0 = const()[name = tensor<string, []>("op_1559_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_1559_end_mask_0 = const()[name = tensor<string, []>("op_1559_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1559_squeeze_mask_0 = const()[name = tensor<string, []>("op_1559_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = var_1559_end_0, end_mask = var_1559_end_mask_0, squeeze_mask = var_1559_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor<string, []>("op_1559_cast_fp16")];
tensor<int32, [5]> var_1563_begin_0 = const()[name = tensor<string, []>("op_1563_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_1563_end_0 = const()[name = tensor<string, []>("op_1563_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_1563_end_mask_0 = const()[name = tensor<string, []>("op_1563_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1563_squeeze_mask_0 = const()[name = tensor<string, []>("op_1563_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1563_cast_fp16 = slice_by_index(begin = var_1563_begin_0, end = var_1563_end_0, end_mask = var_1563_end_mask_0, squeeze_mask = var_1563_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor<string, []>("op_1563_cast_fp16")];
tensor<int32, [4]> v_35_perm_0 = const()[name = tensor<string, []>("v_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1566_transpose_x_0 = const()[name = tensor<string, []>("op_1566_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1566_transpose_y_0 = const()[name = tensor<string, []>("op_1566_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_106_perm_0 = const()[name = tensor<string, []>("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_107_perm_0 = const()[name = tensor<string, []>("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_107 = transpose(perm = transpose_107_perm_0, x = var_1559_cast_fp16)[name = tensor<string, []>("transpose_153")];
tensor<fp16, [1, 1, 1, 128]> transpose_106 = transpose(perm = transpose_106_perm_0, x = var_1551_cast_fp16)[name = tensor<string, []>("transpose_154")];
tensor<fp16, [1, 1, 1, 256]> var_1566_cast_fp16 = matmul(transpose_x = var_1566_transpose_x_0, transpose_y = var_1566_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor<string, []>("op_1566_cast_fp16")];
tensor<fp16, []> var_1567_to_fp16 = const()[name = tensor<string, []>("op_1567_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_69_cast_fp16 = mul(x = var_1566_cast_fp16, y = var_1567_to_fp16)[name = tensor<string, []>("attn_69_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_171_cast_fp16 = add(x = attn_69_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_71_cast_fp16 = softmax(axis = var_1454, x = input_171_cast_fp16)[name = tensor<string, []>("attn_71_cast_fp16")];
tensor<bool, []> out_35_transpose_x_0 = const()[name = tensor<string, []>("out_35_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_35_transpose_y_0 = const()[name = tensor<string, []>("out_35_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_35_cast_fp16 = transpose(perm = v_35_perm_0, x = var_1563_cast_fp16)[name = tensor<string, []>("transpose_155")];
tensor<fp16, [1, 1, 1, 128]> out_35_cast_fp16 = matmul(transpose_x = out_35_transpose_x_0, transpose_y = out_35_transpose_y_0, x = attn_71_cast_fp16, y = v_35_cast_fp16)[name = tensor<string, []>("out_35_cast_fp16")];
tensor<int32, [4]> var_1578_perm_0 = const()[name = tensor<string, []>("op_1578_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1579 = const()[name = tensor<string, []>("op_1579"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1578_cast_fp16 = transpose(perm = var_1578_perm_0, x = out_35_cast_fp16)[name = tensor<string, []>("transpose_152")];
tensor<fp16, [1, 1, 128]> input_173_cast_fp16 = reshape(shape = var_1579, x = var_1578_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
tensor<fp16, [768, 128]> layers_8_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(128060032)))];
tensor<fp16, [1, 1, 768]> linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_cross_attn_o_proj_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_175_cast_fp16 = add(x = input_169_cast_fp16, y = linear_44_cast_fp16)[name = tensor<string, []>("input_175_cast_fp16")];
tensor<int32, [1]> x_85_axes_0 = const()[name = tensor<string, []>("x_85_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_8_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(128256704)))];
tensor<fp16, [1, 1, 768]> x_85_cast_fp16 = layer_norm(axes = x_85_axes_0, epsilon = var_1457_to_fp16, gamma = layers_8_norm_ff_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("x_85_cast_fp16")];
tensor<int32, [3]> input_177_perm_0 = const()[name = tensor<string, []>("input_177_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_179_pad_type_0 = const()[name = tensor<string, []>("input_179_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_179_strides_0 = const()[name = tensor<string, []>("input_179_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_179_pad_0 = const()[name = tensor<string, []>("input_179_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_179_dilations_0 = const()[name = tensor<string, []>("input_179_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_179_groups_0 = const()[name = tensor<string, []>("input_179_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_8_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(128258304)))];
tensor<fp16, [1, 768, 1]> input_177_cast_fp16 = transpose(perm = input_177_perm_0, x = x_85_cast_fp16)[name = tensor<string, []>("transpose_151")];
tensor<fp16, [1, 3072, 1]> input_179_cast_fp16 = conv(dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = layers_8_ffn_conv1_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
tensor<string, []> x_87_pad_type_0 = const()[name = tensor<string, []>("x_87_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_87_strides_0 = const()[name = tensor<string, []>("x_87_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_87_pad_0 = const()[name = tensor<string, []>("x_87_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_87_dilations_0 = const()[name = tensor<string, []>("x_87_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_87_groups_0 = const()[name = tensor<string, []>("x_87_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_8_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_8_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132976960)))];
tensor<fp16, [1, 768, 1]> x_87_cast_fp16 = conv(dilations = x_87_dilations_0, groups = x_87_groups_0, pad = x_87_pad_0, pad_type = x_87_pad_type_0, strides = x_87_strides_0, weight = layers_8_ffn_conv2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("x_87_cast_fp16")];
tensor<int32, [3]> x_89_perm_0 = const()[name = tensor<string, []>("x_89_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_89_cast_fp16 = transpose(perm = x_89_perm_0, x = x_87_cast_fp16)[name = tensor<string, []>("transpose_150")];
tensor<fp16, [1, 1, 768]> input_183_cast_fp16 = add(x = input_175_cast_fp16, y = x_89_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
tensor<int32, []> var_1624 = const()[name = tensor<string, []>("op_1624"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_91_axes_0 = const()[name = tensor<string, []>("x_91_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_9_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137695616)))];
tensor<fp16, []> var_1627_to_fp16 = const()[name = tensor<string, []>("op_1627_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_91_cast_fp16 = layer_norm(axes = x_91_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_sa_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("x_91_cast_fp16")];
tensor<fp16, [2304, 768]> layers_9_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137697216)))];
tensor<fp16, [1, 1, 2304]> linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_qkv_proj_weight_to_fp16, x = x_91_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
tensor<int32, [5]> var_1646 = const()[name = tensor<string, []>("op_1646"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_39_cast_fp16 = reshape(shape = var_1646, x = linear_45_cast_fp16)[name = tensor<string, []>("qkv_39_cast_fp16")];
tensor<int32, [5]> q_37_begin_0 = const()[name = tensor<string, []>("q_37_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_37_end_0 = const()[name = tensor<string, []>("q_37_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_37_end_mask_0 = const()[name = tensor<string, []>("q_37_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_37_squeeze_mask_0 = const()[name = tensor<string, []>("q_37_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_37_cast_fp16 = slice_by_index(begin = q_37_begin_0, end = q_37_end_0, end_mask = q_37_end_mask_0, squeeze_mask = q_37_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("q_37_cast_fp16")];
tensor<int32, [5]> k_37_begin_0 = const()[name = tensor<string, []>("k_37_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_37_end_0 = const()[name = tensor<string, []>("k_37_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_37_end_mask_0 = const()[name = tensor<string, []>("k_37_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_37_squeeze_mask_0 = const()[name = tensor<string, []>("k_37_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_37_cast_fp16 = slice_by_index(begin = k_37_begin_0, end = k_37_end_0, end_mask = k_37_end_mask_0, squeeze_mask = k_37_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
tensor<int32, [5]> v_37_begin_0 = const()[name = tensor<string, []>("v_37_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_37_end_0 = const()[name = tensor<string, []>("v_37_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_37_end_mask_0 = const()[name = tensor<string, []>("v_37_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_37_squeeze_mask_0 = const()[name = tensor<string, []>("v_37_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_37_cast_fp16 = slice_by_index(begin = v_37_begin_0, end = v_37_end_0, end_mask = v_37_end_mask_0, squeeze_mask = v_37_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("v_37_cast_fp16")];
tensor<bool, [512]> var_1658_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position9)[name = tensor<string, []>("op_1658_cast_fp16")];
tensor<int32, [4]> var_1660 = const()[name = tensor<string, []>("op_1660"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_1659_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1659_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1658_cast_fp16_to_fp16 = cast(dtype = var_1659_to_fp16_dtype_0, x = var_1658_cast_fp16)[name = tensor<string, []>("cast_167")];
tensor<fp16, [1, 512, 1, 1]> mask_19_cast_fp16 = reshape(shape = var_1660, x = var_1658_cast_fp16_to_fp16)[name = tensor<string, []>("mask_19_cast_fp16")];
tensor<int32, [4]> k_new_19_reps_0 = const()[name = tensor<string, []>("k_new_19_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_19_cast_fp16 = tile(reps = k_new_19_reps_0, x = k_37_cast_fp16)[name = tensor<string, []>("k_new_19_cast_fp16")];
tensor<int32, [4]> v_new_19_reps_0 = const()[name = tensor<string, []>("v_new_19_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_19_cast_fp16 = tile(reps = v_new_19_reps_0, x = v_37_cast_fp16)[name = tensor<string, []>("v_new_19_cast_fp16")];
tensor<fp16, []> var_1622_to_fp16 = const()[name = tensor<string, []>("op_1622_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_1666_cast_fp16 = sub(x = var_1622_to_fp16, y = mask_19_cast_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1667_cast_fp16 = mul(x = cache_k9, y = var_1666_cast_fp16)[name = tensor<string, []>("op_1667_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1668_cast_fp16 = mul(x = k_new_19_cast_fp16, y = mask_19_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_19 = add(x = var_1667_cast_fp16, y = var_1668_cast_fp16)[name = tensor<string, []>("new_k_19_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1671_cast_fp16 = mul(x = cache_v9, y = var_1666_cast_fp16)[name = tensor<string, []>("op_1671_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1672_cast_fp16 = mul(x = v_new_19_cast_fp16, y = mask_19_cast_fp16)[name = tensor<string, []>("op_1672_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_19 = add(x = var_1671_cast_fp16, y = var_1672_cast_fp16)[name = tensor<string, []>("new_v_19_cast_fp16")];
tensor<bool, [512]> var_1674_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position9)[name = tensor<string, []>("op_1674_cast_fp16")];
tensor<int32, [4]> var_1676 = const()[name = tensor<string, []>("op_1676"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_1675_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1675_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1674_cast_fp16_to_fp16 = cast(dtype = var_1675_to_fp16_dtype_0, x = var_1674_cast_fp16)[name = tensor<string, []>("cast_166")];
tensor<fp16, [1, 1, 1, 512]> var_1677_cast_fp16 = reshape(shape = var_1676, x = var_1674_cast_fp16_to_fp16)[name = tensor<string, []>("op_1677_cast_fp16")];
tensor<int32, [4]> var_1681 = const()[name = tensor<string, []>("op_1681"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1684_transpose_x_0 = const()[name = tensor<string, []>("op_1684_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1684_transpose_y_0 = const()[name = tensor<string, []>("op_1684_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_108_perm_0 = const()[name = tensor<string, []>("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_109_perm_0 = const()[name = tensor<string, []>("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_109 = transpose(perm = transpose_109_perm_0, x = new_k_19)[name = tensor<string, []>("transpose_147")];
tensor<fp16, [1, 12, 1, 64]> transpose_108 = transpose(perm = transpose_108_perm_0, x = q_37_cast_fp16)[name = tensor<string, []>("transpose_148")];
tensor<fp16, [1, 12, 1, 512]> var_1684_cast_fp16 = matmul(transpose_x = var_1684_transpose_x_0, transpose_y = var_1684_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor<string, []>("op_1684_cast_fp16")];
tensor<fp16, []> var_1685_to_fp16 = const()[name = tensor<string, []>("op_1685_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_73_cast_fp16 = mul(x = var_1684_cast_fp16, y = var_1685_to_fp16)[name = tensor<string, []>("attn_73_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_1687_cast_fp16 = sub(x = var_1622_to_fp16, y = var_1677_cast_fp16)[name = tensor<string, []>("op_1687_cast_fp16")];
tensor<fp16, []> var_1688_to_fp16 = const()[name = tensor<string, []>("op_1688_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_1689_cast_fp16 = mul(x = var_1687_cast_fp16, y = var_1688_to_fp16)[name = tensor<string, []>("op_1689_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_185_cast_fp16 = add(x = attn_73_cast_fp16, y = var_1689_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_75_cast_fp16 = softmax(axis = var_1624, x = input_185_cast_fp16)[name = tensor<string, []>("attn_75_cast_fp16")];
tensor<bool, []> out_37_transpose_x_0 = const()[name = tensor<string, []>("out_37_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_37_transpose_y_0 = const()[name = tensor<string, []>("out_37_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_19_cast_fp16 = transpose(perm = var_1681, x = new_v_19)[name = tensor<string, []>("transpose_149")];
tensor<fp16, [1, 12, 1, 64]> out_37_cast_fp16 = matmul(transpose_x = out_37_transpose_x_0, transpose_y = out_37_transpose_y_0, x = attn_75_cast_fp16, y = v4_19_cast_fp16)[name = tensor<string, []>("out_37_cast_fp16")];
tensor<int32, [4]> var_1693_perm_0 = const()[name = tensor<string, []>("op_1693_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1694 = const()[name = tensor<string, []>("op_1694"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1693_cast_fp16 = transpose(perm = var_1693_perm_0, x = out_37_cast_fp16)[name = tensor<string, []>("transpose_146")];
tensor<fp16, [1, 1, 768]> input_187_cast_fp16 = reshape(shape = var_1694, x = var_1693_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
tensor<fp16, [768, 768]> layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(141236224)))];
tensor<fp16, [1, 1, 768]> linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
tensor<fp16, []> var_1698_to_fp16 = const()[name = tensor<string, []>("op_1698_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_1699 = add(x = position9, y = var_1698_to_fp16)[name = tensor<string, []>("op_1699_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_189_cast_fp16 = add(x = input_183_cast_fp16, y = linear_46_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
tensor<int32, [1]> x_93_axes_0 = const()[name = tensor<string, []>("x_93_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_9_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142415936)))];
tensor<fp16, [1, 1, 768]> x_93_cast_fp16 = layer_norm(axes = x_93_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_xa_query_weight_to_fp16, x = input_189_cast_fp16)[name = tensor<string, []>("x_93_cast_fp16")];
tensor<int32, [1]> memory_19_axes_0 = const()[name = tensor<string, []>("memory_19_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_9_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142417536)))];
tensor<fp16, [1, 256, 768]> memory_19_cast_fp16 = layer_norm(axes = memory_19_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_19_cast_fp16")];
tensor<fp16, [128, 768]> layers_9_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142419136)))];
tensor<fp16, [1, 1, 128]> linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_9_cross_attn_q_proj_weight_to_fp16, x = x_93_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
tensor<int32, [4]> var_1720 = const()[name = tensor<string, []>("op_1720"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_1721_cast_fp16 = reshape(shape = var_1720, x = linear_47_cast_fp16)[name = tensor<string, []>("op_1721_cast_fp16")];
tensor<fp16, [256, 768]> layers_9_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142615808)))];
tensor<fp16, [1, 256, 256]> linear_48_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_9_cross_attn_kv_proj_weight_to_fp16, x = memory_19_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
tensor<int32, [5]> var_1725 = const()[name = tensor<string, []>("op_1725"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_19_cast_fp16 = reshape(shape = var_1725, x = linear_48_cast_fp16)[name = tensor<string, []>("kv_19_cast_fp16")];
tensor<int32, [5]> var_1729_begin_0 = const()[name = tensor<string, []>("op_1729_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_1729_end_0 = const()[name = tensor<string, []>("op_1729_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_1729_end_mask_0 = const()[name = tensor<string, []>("op_1729_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1729_squeeze_mask_0 = const()[name = tensor<string, []>("op_1729_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1729_cast_fp16 = slice_by_index(begin = var_1729_begin_0, end = var_1729_end_0, end_mask = var_1729_end_mask_0, squeeze_mask = var_1729_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor<string, []>("op_1729_cast_fp16")];
tensor<int32, [5]> var_1733_begin_0 = const()[name = tensor<string, []>("op_1733_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_1733_end_0 = const()[name = tensor<string, []>("op_1733_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_1733_end_mask_0 = const()[name = tensor<string, []>("op_1733_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1733_squeeze_mask_0 = const()[name = tensor<string, []>("op_1733_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1733_cast_fp16 = slice_by_index(begin = var_1733_begin_0, end = var_1733_end_0, end_mask = var_1733_end_mask_0, squeeze_mask = var_1733_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor<string, []>("op_1733_cast_fp16")];
tensor<int32, [4]> v_39_perm_0 = const()[name = tensor<string, []>("v_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1736_transpose_x_0 = const()[name = tensor<string, []>("op_1736_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1736_transpose_y_0 = const()[name = tensor<string, []>("op_1736_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_110_perm_0 = const()[name = tensor<string, []>("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_111_perm_0 = const()[name = tensor<string, []>("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_111 = transpose(perm = transpose_111_perm_0, x = var_1729_cast_fp16)[name = tensor<string, []>("transpose_143")];
tensor<fp16, [1, 1, 1, 128]> transpose_110 = transpose(perm = transpose_110_perm_0, x = var_1721_cast_fp16)[name = tensor<string, []>("transpose_144")];
tensor<fp16, [1, 1, 1, 256]> var_1736_cast_fp16 = matmul(transpose_x = var_1736_transpose_x_0, transpose_y = var_1736_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor<string, []>("op_1736_cast_fp16")];
tensor<fp16, []> var_1737_to_fp16 = const()[name = tensor<string, []>("op_1737_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_77_cast_fp16 = mul(x = var_1736_cast_fp16, y = var_1737_to_fp16)[name = tensor<string, []>("attn_77_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_191_cast_fp16 = add(x = attn_77_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_79_cast_fp16 = softmax(axis = var_1624, x = input_191_cast_fp16)[name = tensor<string, []>("attn_79_cast_fp16")];
tensor<bool, []> out_39_transpose_x_0 = const()[name = tensor<string, []>("out_39_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_39_transpose_y_0 = const()[name = tensor<string, []>("out_39_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_39_cast_fp16 = transpose(perm = v_39_perm_0, x = var_1733_cast_fp16)[name = tensor<string, []>("transpose_145")];
tensor<fp16, [1, 1, 1, 128]> out_39_cast_fp16 = matmul(transpose_x = out_39_transpose_x_0, transpose_y = out_39_transpose_y_0, x = attn_79_cast_fp16, y = v_39_cast_fp16)[name = tensor<string, []>("out_39_cast_fp16")];
tensor<int32, [4]> var_1748_perm_0 = const()[name = tensor<string, []>("op_1748_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1749 = const()[name = tensor<string, []>("op_1749"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1748_cast_fp16 = transpose(perm = var_1748_perm_0, x = out_39_cast_fp16)[name = tensor<string, []>("transpose_142")];
tensor<fp16, [1, 1, 128]> input_193_cast_fp16 = reshape(shape = var_1749, x = var_1748_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
tensor<fp16, [768, 128]> layers_9_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143009088)))];
tensor<fp16, [1, 1, 768]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_cross_attn_o_proj_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_195_cast_fp16 = add(x = input_189_cast_fp16, y = linear_49_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
tensor<int32, [1]> x_95_axes_0 = const()[name = tensor<string, []>("x_95_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_9_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143205760)))];
tensor<fp16, [1, 1, 768]> x_95_cast_fp16 = layer_norm(axes = x_95_axes_0, epsilon = var_1627_to_fp16, gamma = layers_9_norm_ff_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("x_95_cast_fp16")];
tensor<int32, [3]> input_197_perm_0 = const()[name = tensor<string, []>("input_197_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_199_pad_type_0 = const()[name = tensor<string, []>("input_199_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_199_strides_0 = const()[name = tensor<string, []>("input_199_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_199_pad_0 = const()[name = tensor<string, []>("input_199_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_199_dilations_0 = const()[name = tensor<string, []>("input_199_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_199_groups_0 = const()[name = tensor<string, []>("input_199_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_9_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143207360)))];
tensor<fp16, [1, 768, 1]> input_197_cast_fp16 = transpose(perm = input_197_perm_0, x = x_95_cast_fp16)[name = tensor<string, []>("transpose_141")];
tensor<fp16, [1, 3072, 1]> input_199_cast_fp16 = conv(dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = layers_9_ffn_conv1_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
tensor<string, []> x_97_pad_type_0 = const()[name = tensor<string, []>("x_97_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_97_strides_0 = const()[name = tensor<string, []>("x_97_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_97_pad_0 = const()[name = tensor<string, []>("x_97_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_97_dilations_0 = const()[name = tensor<string, []>("x_97_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_97_groups_0 = const()[name = tensor<string, []>("x_97_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_9_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_9_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147926016)))];
tensor<fp16, [1, 768, 1]> x_97_cast_fp16 = conv(dilations = x_97_dilations_0, groups = x_97_groups_0, pad = x_97_pad_0, pad_type = x_97_pad_type_0, strides = x_97_strides_0, weight = layers_9_ffn_conv2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("x_97_cast_fp16")];
tensor<int32, [3]> x_99_perm_0 = const()[name = tensor<string, []>("x_99_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_99_cast_fp16 = transpose(perm = x_99_perm_0, x = x_97_cast_fp16)[name = tensor<string, []>("transpose_140")];
tensor<fp16, [1, 1, 768]> input_203_cast_fp16 = add(x = input_195_cast_fp16, y = x_99_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
tensor<int32, []> var_1794 = const()[name = tensor<string, []>("op_1794"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_101_axes_0 = const()[name = tensor<string, []>("x_101_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_10_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152644672)))];
tensor<fp16, []> var_1797_to_fp16 = const()[name = tensor<string, []>("op_1797_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_101_cast_fp16 = layer_norm(axes = x_101_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_sa_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("x_101_cast_fp16")];
tensor<fp16, [2304, 768]> layers_10_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152646272)))];
tensor<fp16, [1, 1, 2304]> linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_qkv_proj_weight_to_fp16, x = x_101_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")];
tensor<int32, [5]> var_1816 = const()[name = tensor<string, []>("op_1816"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_43_cast_fp16 = reshape(shape = var_1816, x = linear_50_cast_fp16)[name = tensor<string, []>("qkv_43_cast_fp16")];
tensor<int32, [5]> q_41_begin_0 = const()[name = tensor<string, []>("q_41_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_41_end_0 = const()[name = tensor<string, []>("q_41_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_41_end_mask_0 = const()[name = tensor<string, []>("q_41_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_41_squeeze_mask_0 = const()[name = tensor<string, []>("q_41_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_41_cast_fp16 = slice_by_index(begin = q_41_begin_0, end = q_41_end_0, end_mask = q_41_end_mask_0, squeeze_mask = q_41_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("q_41_cast_fp16")];
tensor<int32, [5]> k_41_begin_0 = const()[name = tensor<string, []>("k_41_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_41_end_0 = const()[name = tensor<string, []>("k_41_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_41_end_mask_0 = const()[name = tensor<string, []>("k_41_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_41_squeeze_mask_0 = const()[name = tensor<string, []>("k_41_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_41_cast_fp16 = slice_by_index(begin = k_41_begin_0, end = k_41_end_0, end_mask = k_41_end_mask_0, squeeze_mask = k_41_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
tensor<int32, [5]> v_41_begin_0 = const()[name = tensor<string, []>("v_41_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_41_end_0 = const()[name = tensor<string, []>("v_41_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_41_end_mask_0 = const()[name = tensor<string, []>("v_41_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_41_squeeze_mask_0 = const()[name = tensor<string, []>("v_41_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_41_cast_fp16 = slice_by_index(begin = v_41_begin_0, end = v_41_end_0, end_mask = v_41_end_mask_0, squeeze_mask = v_41_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("v_41_cast_fp16")];
tensor<bool, [512]> var_1828_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position10)[name = tensor<string, []>("op_1828_cast_fp16")];
tensor<int32, [4]> var_1830 = const()[name = tensor<string, []>("op_1830"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_1829_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1829_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1828_cast_fp16_to_fp16 = cast(dtype = var_1829_to_fp16_dtype_0, x = var_1828_cast_fp16)[name = tensor<string, []>("cast_165")];
tensor<fp16, [1, 512, 1, 1]> mask_21_cast_fp16 = reshape(shape = var_1830, x = var_1828_cast_fp16_to_fp16)[name = tensor<string, []>("mask_21_cast_fp16")];
tensor<int32, [4]> k_new_21_reps_0 = const()[name = tensor<string, []>("k_new_21_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_21_cast_fp16 = tile(reps = k_new_21_reps_0, x = k_41_cast_fp16)[name = tensor<string, []>("k_new_21_cast_fp16")];
tensor<int32, [4]> v_new_21_reps_0 = const()[name = tensor<string, []>("v_new_21_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_21_cast_fp16 = tile(reps = v_new_21_reps_0, x = v_41_cast_fp16)[name = tensor<string, []>("v_new_21_cast_fp16")];
tensor<fp16, []> var_1792_to_fp16 = const()[name = tensor<string, []>("op_1792_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_1836_cast_fp16 = sub(x = var_1792_to_fp16, y = mask_21_cast_fp16)[name = tensor<string, []>("op_1836_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1837_cast_fp16 = mul(x = cache_k10, y = var_1836_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1838_cast_fp16 = mul(x = k_new_21_cast_fp16, y = mask_21_cast_fp16)[name = tensor<string, []>("op_1838_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k_21 = add(x = var_1837_cast_fp16, y = var_1838_cast_fp16)[name = tensor<string, []>("new_k_21_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1841_cast_fp16 = mul(x = cache_v10, y = var_1836_cast_fp16)[name = tensor<string, []>("op_1841_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_1842_cast_fp16 = mul(x = v_new_21_cast_fp16, y = mask_21_cast_fp16)[name = tensor<string, []>("op_1842_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v_21 = add(x = var_1841_cast_fp16, y = var_1842_cast_fp16)[name = tensor<string, []>("new_v_21_cast_fp16")];
tensor<bool, [512]> var_1844_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position10)[name = tensor<string, []>("op_1844_cast_fp16")];
tensor<int32, [4]> var_1846 = const()[name = tensor<string, []>("op_1846"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_1845_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1845_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1844_cast_fp16_to_fp16 = cast(dtype = var_1845_to_fp16_dtype_0, x = var_1844_cast_fp16)[name = tensor<string, []>("cast_164")];
tensor<fp16, [1, 1, 1, 512]> var_1847_cast_fp16 = reshape(shape = var_1846, x = var_1844_cast_fp16_to_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
tensor<int32, [4]> var_1851 = const()[name = tensor<string, []>("op_1851"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1854_transpose_x_0 = const()[name = tensor<string, []>("op_1854_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1854_transpose_y_0 = const()[name = tensor<string, []>("op_1854_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_112_perm_0 = const()[name = tensor<string, []>("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_113_perm_0 = const()[name = tensor<string, []>("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_113 = transpose(perm = transpose_113_perm_0, x = new_k_21)[name = tensor<string, []>("transpose_137")];
tensor<fp16, [1, 12, 1, 64]> transpose_112 = transpose(perm = transpose_112_perm_0, x = q_41_cast_fp16)[name = tensor<string, []>("transpose_138")];
tensor<fp16, [1, 12, 1, 512]> var_1854_cast_fp16 = matmul(transpose_x = var_1854_transpose_x_0, transpose_y = var_1854_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor<string, []>("op_1854_cast_fp16")];
tensor<fp16, []> var_1855_to_fp16 = const()[name = tensor<string, []>("op_1855_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_81_cast_fp16 = mul(x = var_1854_cast_fp16, y = var_1855_to_fp16)[name = tensor<string, []>("attn_81_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_1857_cast_fp16 = sub(x = var_1792_to_fp16, y = var_1847_cast_fp16)[name = tensor<string, []>("op_1857_cast_fp16")];
tensor<fp16, []> var_1858_to_fp16 = const()[name = tensor<string, []>("op_1858_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_1859_cast_fp16 = mul(x = var_1857_cast_fp16, y = var_1858_to_fp16)[name = tensor<string, []>("op_1859_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_205_cast_fp16 = add(x = attn_81_cast_fp16, y = var_1859_cast_fp16)[name = tensor<string, []>("input_205_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_83_cast_fp16 = softmax(axis = var_1794, x = input_205_cast_fp16)[name = tensor<string, []>("attn_83_cast_fp16")];
tensor<bool, []> out_41_transpose_x_0 = const()[name = tensor<string, []>("out_41_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_41_transpose_y_0 = const()[name = tensor<string, []>("out_41_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_21_cast_fp16 = transpose(perm = var_1851, x = new_v_21)[name = tensor<string, []>("transpose_139")];
tensor<fp16, [1, 12, 1, 64]> out_41_cast_fp16 = matmul(transpose_x = out_41_transpose_x_0, transpose_y = out_41_transpose_y_0, x = attn_83_cast_fp16, y = v4_21_cast_fp16)[name = tensor<string, []>("out_41_cast_fp16")];
tensor<int32, [4]> var_1863_perm_0 = const()[name = tensor<string, []>("op_1863_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1864 = const()[name = tensor<string, []>("op_1864"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1863_cast_fp16 = transpose(perm = var_1863_perm_0, x = out_41_cast_fp16)[name = tensor<string, []>("transpose_136")];
tensor<fp16, [1, 1, 768]> input_207_cast_fp16 = reshape(shape = var_1864, x = var_1863_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
tensor<fp16, [768, 768]> layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(156185280)))];
tensor<fp16, [1, 1, 768]> linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_207_cast_fp16)[name = tensor<string, []>("linear_51_cast_fp16")];
tensor<fp16, []> var_1868_to_fp16 = const()[name = tensor<string, []>("op_1868_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_1869 = add(x = position10, y = var_1868_to_fp16)[name = tensor<string, []>("op_1869_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_209_cast_fp16 = add(x = input_203_cast_fp16, y = linear_51_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
tensor<int32, [1]> x_103_axes_0 = const()[name = tensor<string, []>("x_103_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_10_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157364992)))];
tensor<fp16, [1, 1, 768]> x_103_cast_fp16 = layer_norm(axes = x_103_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_xa_query_weight_to_fp16, x = input_209_cast_fp16)[name = tensor<string, []>("x_103_cast_fp16")];
tensor<int32, [1]> memory_21_axes_0 = const()[name = tensor<string, []>("memory_21_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_10_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157366592)))];
tensor<fp16, [1, 256, 768]> memory_21_cast_fp16 = layer_norm(axes = memory_21_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_21_cast_fp16")];
tensor<fp16, [128, 768]> layers_10_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157368192)))];
tensor<fp16, [1, 1, 128]> linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_10_cross_attn_q_proj_weight_to_fp16, x = x_103_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")];
tensor<int32, [4]> var_1890 = const()[name = tensor<string, []>("op_1890"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_1891_cast_fp16 = reshape(shape = var_1890, x = linear_52_cast_fp16)[name = tensor<string, []>("op_1891_cast_fp16")];
tensor<fp16, [256, 768]> layers_10_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157564864)))];
tensor<fp16, [1, 256, 256]> linear_53_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_10_cross_attn_kv_proj_weight_to_fp16, x = memory_21_cast_fp16)[name = tensor<string, []>("linear_53_cast_fp16")];
tensor<int32, [5]> var_1895 = const()[name = tensor<string, []>("op_1895"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_21_cast_fp16 = reshape(shape = var_1895, x = linear_53_cast_fp16)[name = tensor<string, []>("kv_21_cast_fp16")];
tensor<int32, [5]> var_1899_begin_0 = const()[name = tensor<string, []>("op_1899_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_1899_end_0 = const()[name = tensor<string, []>("op_1899_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_1899_end_mask_0 = const()[name = tensor<string, []>("op_1899_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1899_squeeze_mask_0 = const()[name = tensor<string, []>("op_1899_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1899_cast_fp16 = slice_by_index(begin = var_1899_begin_0, end = var_1899_end_0, end_mask = var_1899_end_mask_0, squeeze_mask = var_1899_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor<string, []>("op_1899_cast_fp16")];
tensor<int32, [5]> var_1903_begin_0 = const()[name = tensor<string, []>("op_1903_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_1903_end_0 = const()[name = tensor<string, []>("op_1903_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_1903_end_mask_0 = const()[name = tensor<string, []>("op_1903_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_1903_squeeze_mask_0 = const()[name = tensor<string, []>("op_1903_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_1903_cast_fp16 = slice_by_index(begin = var_1903_begin_0, end = var_1903_end_0, end_mask = var_1903_end_mask_0, squeeze_mask = var_1903_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
tensor<int32, [4]> v_43_perm_0 = const()[name = tensor<string, []>("v_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_1906_transpose_x_0 = const()[name = tensor<string, []>("op_1906_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_1906_transpose_y_0 = const()[name = tensor<string, []>("op_1906_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_114_perm_0 = const()[name = tensor<string, []>("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_115_perm_0 = const()[name = tensor<string, []>("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_115 = transpose(perm = transpose_115_perm_0, x = var_1899_cast_fp16)[name = tensor<string, []>("transpose_133")];
tensor<fp16, [1, 1, 1, 128]> transpose_114 = transpose(perm = transpose_114_perm_0, x = var_1891_cast_fp16)[name = tensor<string, []>("transpose_134")];
tensor<fp16, [1, 1, 1, 256]> var_1906_cast_fp16 = matmul(transpose_x = var_1906_transpose_x_0, transpose_y = var_1906_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor<string, []>("op_1906_cast_fp16")];
tensor<fp16, []> var_1907_to_fp16 = const()[name = tensor<string, []>("op_1907_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_85_cast_fp16 = mul(x = var_1906_cast_fp16, y = var_1907_to_fp16)[name = tensor<string, []>("attn_85_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_211_cast_fp16 = add(x = attn_85_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_87_cast_fp16 = softmax(axis = var_1794, x = input_211_cast_fp16)[name = tensor<string, []>("attn_87_cast_fp16")];
tensor<bool, []> out_43_transpose_x_0 = const()[name = tensor<string, []>("out_43_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_43_transpose_y_0 = const()[name = tensor<string, []>("out_43_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_43_cast_fp16 = transpose(perm = v_43_perm_0, x = var_1903_cast_fp16)[name = tensor<string, []>("transpose_135")];
tensor<fp16, [1, 1, 1, 128]> out_43_cast_fp16 = matmul(transpose_x = out_43_transpose_x_0, transpose_y = out_43_transpose_y_0, x = attn_87_cast_fp16, y = v_43_cast_fp16)[name = tensor<string, []>("out_43_cast_fp16")];
tensor<int32, [4]> var_1918_perm_0 = const()[name = tensor<string, []>("op_1918_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1919 = const()[name = tensor<string, []>("op_1919"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1918_cast_fp16 = transpose(perm = var_1918_perm_0, x = out_43_cast_fp16)[name = tensor<string, []>("transpose_132")];
tensor<fp16, [1, 1, 128]> input_213_cast_fp16 = reshape(shape = var_1919, x = var_1918_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
tensor<fp16, [768, 128]> layers_10_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157958144)))];
tensor<fp16, [1, 1, 768]> linear_54_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_cross_attn_o_proj_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_215_cast_fp16 = add(x = input_209_cast_fp16, y = linear_54_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
tensor<int32, [1]> x_105_axes_0 = const()[name = tensor<string, []>("x_105_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_10_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158154816)))];
tensor<fp16, [1, 1, 768]> x_105_cast_fp16 = layer_norm(axes = x_105_axes_0, epsilon = var_1797_to_fp16, gamma = layers_10_norm_ff_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("x_105_cast_fp16")];
tensor<int32, [3]> input_217_perm_0 = const()[name = tensor<string, []>("input_217_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_219_pad_type_0 = const()[name = tensor<string, []>("input_219_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_219_strides_0 = const()[name = tensor<string, []>("input_219_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_219_pad_0 = const()[name = tensor<string, []>("input_219_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_219_dilations_0 = const()[name = tensor<string, []>("input_219_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_219_groups_0 = const()[name = tensor<string, []>("input_219_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_10_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158156416)))];
tensor<fp16, [1, 768, 1]> input_217_cast_fp16 = transpose(perm = input_217_perm_0, x = x_105_cast_fp16)[name = tensor<string, []>("transpose_131")];
tensor<fp16, [1, 3072, 1]> input_219_cast_fp16 = conv(dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = layers_10_ffn_conv1_weight_to_fp16, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
tensor<string, []> input_221_mode_0 = const()[name = tensor<string, []>("input_221_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
tensor<string, []> x_107_pad_type_0 = const()[name = tensor<string, []>("x_107_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_107_strides_0 = const()[name = tensor<string, []>("x_107_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_107_pad_0 = const()[name = tensor<string, []>("x_107_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_107_dilations_0 = const()[name = tensor<string, []>("x_107_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_107_groups_0 = const()[name = tensor<string, []>("x_107_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_10_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_10_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162875072)))];
tensor<fp16, [1, 768, 1]> x_107_cast_fp16 = conv(dilations = x_107_dilations_0, groups = x_107_groups_0, pad = x_107_pad_0, pad_type = x_107_pad_type_0, strides = x_107_strides_0, weight = layers_10_ffn_conv2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("x_107_cast_fp16")];
tensor<int32, [3]> x_109_perm_0 = const()[name = tensor<string, []>("x_109_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_109_cast_fp16 = transpose(perm = x_109_perm_0, x = x_107_cast_fp16)[name = tensor<string, []>("transpose_130")];
tensor<fp16, [1, 1, 768]> input_223_cast_fp16 = add(x = input_215_cast_fp16, y = x_109_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
tensor<int32, []> var_1964 = const()[name = tensor<string, []>("op_1964"), val = tensor<int32, []>(-1)];
tensor<int32, [1]> x_111_axes_0 = const()[name = tensor<string, []>("x_111_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_11_norm_sa_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_norm_sa_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167593728)))];
tensor<fp16, []> var_1967_to_fp16 = const()[name = tensor<string, []>("op_1967_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_111_cast_fp16 = layer_norm(axes = x_111_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_sa_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("x_111_cast_fp16")];
tensor<fp16, [2304, 768]> layers_11_self_attn_qkv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_qkv_proj_weight_to_fp16"), val = tensor<fp16, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167595328)))];
tensor<fp16, [1, 1, 2304]> linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_qkv_proj_weight_to_fp16, x = x_111_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")];
tensor<int32, [5]> var_1986 = const()[name = tensor<string, []>("op_1986"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_cast_fp16 = reshape(shape = var_1986, x = linear_55_cast_fp16)[name = tensor<string, []>("qkv_cast_fp16")];
tensor<int32, [5]> q_45_begin_0 = const()[name = tensor<string, []>("q_45_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_45_end_0 = const()[name = tensor<string, []>("q_45_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_45_end_mask_0 = const()[name = tensor<string, []>("q_45_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_45_squeeze_mask_0 = const()[name = tensor<string, []>("q_45_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_45_cast_fp16 = slice_by_index(begin = q_45_begin_0, end = q_45_end_0, end_mask = q_45_end_mask_0, squeeze_mask = q_45_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("q_45_cast_fp16")];
tensor<int32, [5]> k_45_begin_0 = const()[name = tensor<string, []>("k_45_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> k_45_end_0 = const()[name = tensor<string, []>("k_45_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> k_45_end_mask_0 = const()[name = tensor<string, []>("k_45_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> k_45_squeeze_mask_0 = const()[name = tensor<string, []>("k_45_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> k_45_cast_fp16 = slice_by_index(begin = k_45_begin_0, end = k_45_end_0, end_mask = k_45_end_mask_0, squeeze_mask = k_45_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
tensor<int32, [5]> v_45_begin_0 = const()[name = tensor<string, []>("v_45_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> v_45_end_0 = const()[name = tensor<string, []>("v_45_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> v_45_end_mask_0 = const()[name = tensor<string, []>("v_45_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> v_45_squeeze_mask_0 = const()[name = tensor<string, []>("v_45_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> v_45_cast_fp16 = slice_by_index(begin = v_45_begin_0, end = v_45_end_0, end_mask = v_45_end_mask_0, squeeze_mask = v_45_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("v_45_cast_fp16")];
tensor<bool, [512]> var_1998_cast_fp16 = equal(x = positions_range_1_promoted_to_fp16, y = position11)[name = tensor<string, []>("op_1998_cast_fp16")];
tensor<int32, [4]> var_2000 = const()[name = tensor<string, []>("op_2000"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<string, []> var_1999_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_1999_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_1998_cast_fp16_to_fp16 = cast(dtype = var_1999_to_fp16_dtype_0, x = var_1998_cast_fp16)[name = tensor<string, []>("cast_163")];
tensor<fp16, [1, 512, 1, 1]> mask_cast_fp16 = reshape(shape = var_2000, x = var_1998_cast_fp16_to_fp16)[name = tensor<string, []>("mask_cast_fp16")];
tensor<int32, [4]> k_new_reps_0 = const()[name = tensor<string, []>("k_new_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> k_new_cast_fp16 = tile(reps = k_new_reps_0, x = k_45_cast_fp16)[name = tensor<string, []>("k_new_cast_fp16")];
tensor<int32, [4]> v_new_reps_0 = const()[name = tensor<string, []>("v_new_reps_0"), val = tensor<int32, [4]>([1, 512, 1, 1])];
tensor<fp16, [1, 512, 12, 64]> v_new_cast_fp16 = tile(reps = v_new_reps_0, x = v_45_cast_fp16)[name = tensor<string, []>("v_new_cast_fp16")];
tensor<fp16, []> var_1962_to_fp16 = const()[name = tensor<string, []>("op_1962_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1, 512, 1, 1]> var_2006_cast_fp16 = sub(x = var_1962_to_fp16, y = mask_cast_fp16)[name = tensor<string, []>("op_2006_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_2007_cast_fp16 = mul(x = cache_k11, y = var_2006_cast_fp16)[name = tensor<string, []>("op_2007_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_2008_cast_fp16 = mul(x = k_new_cast_fp16, y = mask_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_k = add(x = var_2007_cast_fp16, y = var_2008_cast_fp16)[name = tensor<string, []>("new_k_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_2011_cast_fp16 = mul(x = cache_v11, y = var_2006_cast_fp16)[name = tensor<string, []>("op_2011_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> var_2012_cast_fp16 = mul(x = v_new_cast_fp16, y = mask_cast_fp16)[name = tensor<string, []>("op_2012_cast_fp16")];
tensor<fp16, [1, 512, 12, 64]> new_v = add(x = var_2011_cast_fp16, y = var_2012_cast_fp16)[name = tensor<string, []>("new_v_cast_fp16")];
tensor<bool, [512]> var_2014_cast_fp16 = less_equal(x = positions_range_1_promoted_to_fp16, y = position11)[name = tensor<string, []>("op_2014_cast_fp16")];
tensor<int32, [4]> var_2016 = const()[name = tensor<string, []>("op_2016"), val = tensor<int32, [4]>([1, 1, 1, 512])];
tensor<string, []> var_2015_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_2015_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
tensor<fp16, [512]> var_2014_cast_fp16_to_fp16 = cast(dtype = var_2015_to_fp16_dtype_0, x = var_2014_cast_fp16)[name = tensor<string, []>("cast_162")];
tensor<fp16, [1, 1, 1, 512]> var_2017_cast_fp16 = reshape(shape = var_2016, x = var_2014_cast_fp16_to_fp16)[name = tensor<string, []>("op_2017_cast_fp16")];
tensor<int32, [4]> var_2021 = const()[name = tensor<string, []>("op_2021"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_2024_transpose_x_0 = const()[name = tensor<string, []>("op_2024_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_2024_transpose_y_0 = const()[name = tensor<string, []>("op_2024_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_116_perm_0 = const()[name = tensor<string, []>("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_117_perm_0 = const()[name = tensor<string, []>("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 512]> transpose_117 = transpose(perm = transpose_117_perm_0, x = new_k)[name = tensor<string, []>("transpose_127")];
tensor<fp16, [1, 12, 1, 64]> transpose_116 = transpose(perm = transpose_116_perm_0, x = q_45_cast_fp16)[name = tensor<string, []>("transpose_128")];
tensor<fp16, [1, 12, 1, 512]> var_2024_cast_fp16 = matmul(transpose_x = var_2024_transpose_x_0, transpose_y = var_2024_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor<string, []>("op_2024_cast_fp16")];
tensor<fp16, []> var_2025_to_fp16 = const()[name = tensor<string, []>("op_2025_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
tensor<fp16, [1, 12, 1, 512]> attn_89_cast_fp16 = mul(x = var_2024_cast_fp16, y = var_2025_to_fp16)[name = tensor<string, []>("attn_89_cast_fp16")];
tensor<fp16, [1, 1, 1, 512]> var_2027_cast_fp16 = sub(x = var_1962_to_fp16, y = var_2017_cast_fp16)[name = tensor<string, []>("op_2027_cast_fp16")];
tensor<fp16, []> var_2028_to_fp16 = const()[name = tensor<string, []>("op_2028_to_fp16"), val = tensor<fp16, []>(-0x1.d4cp+14)];
tensor<fp16, [1, 1, 1, 512]> var_2029_cast_fp16 = mul(x = var_2027_cast_fp16, y = var_2028_to_fp16)[name = tensor<string, []>("op_2029_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> input_225_cast_fp16 = add(x = attn_89_cast_fp16, y = var_2029_cast_fp16)[name = tensor<string, []>("input_225_cast_fp16")];
tensor<fp16, [1, 12, 1, 512]> attn_91_cast_fp16 = softmax(axis = var_1964, x = input_225_cast_fp16)[name = tensor<string, []>("attn_91_cast_fp16")];
tensor<bool, []> out_45_transpose_x_0 = const()[name = tensor<string, []>("out_45_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_45_transpose_y_0 = const()[name = tensor<string, []>("out_45_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 12, 512, 64]> v4_cast_fp16 = transpose(perm = var_2021, x = new_v)[name = tensor<string, []>("transpose_129")];
tensor<fp16, [1, 12, 1, 64]> out_45_cast_fp16 = matmul(transpose_x = out_45_transpose_x_0, transpose_y = out_45_transpose_y_0, x = attn_91_cast_fp16, y = v4_cast_fp16)[name = tensor<string, []>("out_45_cast_fp16")];
tensor<int32, [4]> var_2033_perm_0 = const()[name = tensor<string, []>("op_2033_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2034 = const()[name = tensor<string, []>("op_2034"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_2033_cast_fp16 = transpose(perm = var_2033_perm_0, x = out_45_cast_fp16)[name = tensor<string, []>("transpose_126")];
tensor<fp16, [1, 1, 768]> input_227_cast_fp16 = reshape(shape = var_2034, x = var_2033_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
tensor<fp16, [768, 768]> layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171134336)))];
tensor<fp16, [1, 1, 768]> linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_227_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")];
tensor<fp16, []> var_2038_to_fp16 = const()[name = tensor<string, []>("op_2038_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
tensor<fp16, [1]> var_2039 = add(x = position11, y = var_2038_to_fp16)[name = tensor<string, []>("op_2039_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_229_cast_fp16 = add(x = input_223_cast_fp16, y = linear_56_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
tensor<int32, [1]> x_113_axes_0 = const()[name = tensor<string, []>("x_113_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_11_norm_xa_query_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_norm_xa_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172314048)))];
tensor<fp16, [1, 1, 768]> x_113_cast_fp16 = layer_norm(axes = x_113_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_xa_query_weight_to_fp16, x = input_229_cast_fp16)[name = tensor<string, []>("x_113_cast_fp16")];
tensor<int32, [1]> memory_axes_0 = const()[name = tensor<string, []>("memory_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_11_norm_xa_memory_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_norm_xa_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172315648)))];
tensor<fp16, [1, 256, 768]> memory_cast_fp16 = layer_norm(axes = memory_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_xa_memory_weight_to_fp16, x = encoder_output)[name = tensor<string, []>("memory_cast_fp16")];
tensor<fp16, [128, 768]> layers_11_cross_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_cross_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172317248)))];
tensor<fp16, [1, 1, 128]> linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = layers_11_cross_attn_q_proj_weight_to_fp16, x = x_113_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")];
tensor<int32, [4]> var_2060 = const()[name = tensor<string, []>("op_2060"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> var_2061_cast_fp16 = reshape(shape = var_2060, x = linear_57_cast_fp16)[name = tensor<string, []>("op_2061_cast_fp16")];
tensor<fp16, [256, 768]> layers_11_cross_attn_kv_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_cross_attn_kv_proj_weight_to_fp16"), val = tensor<fp16, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172513920)))];
tensor<fp16, [1, 256, 256]> linear_58_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = layers_11_cross_attn_kv_proj_weight_to_fp16, x = memory_cast_fp16)[name = tensor<string, []>("linear_58_cast_fp16")];
tensor<int32, [5]> var_2065 = const()[name = tensor<string, []>("op_2065"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<fp16, [1, 256, 2, 1, 128]> kv_cast_fp16 = reshape(shape = var_2065, x = linear_58_cast_fp16)[name = tensor<string, []>("kv_cast_fp16")];
tensor<int32, [5]> var_2069_begin_0 = const()[name = tensor<string, []>("op_2069_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> var_2069_end_0 = const()[name = tensor<string, []>("op_2069_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])];
tensor<bool, [5]> var_2069_end_mask_0 = const()[name = tensor<string, []>("op_2069_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_2069_squeeze_mask_0 = const()[name = tensor<string, []>("op_2069_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_2069_cast_fp16 = slice_by_index(begin = var_2069_begin_0, end = var_2069_end_0, end_mask = var_2069_end_mask_0, squeeze_mask = var_2069_squeeze_mask_0, x = kv_cast_fp16)[name = tensor<string, []>("op_2069_cast_fp16")];
tensor<int32, [5]> var_2073_begin_0 = const()[name = tensor<string, []>("op_2073_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> var_2073_end_0 = const()[name = tensor<string, []>("op_2073_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])];
tensor<bool, [5]> var_2073_end_mask_0 = const()[name = tensor<string, []>("op_2073_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> var_2073_squeeze_mask_0 = const()[name = tensor<string, []>("op_2073_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 256, 1, 128]> var_2073_cast_fp16 = slice_by_index(begin = var_2073_begin_0, end = var_2073_end_0, end_mask = var_2073_end_mask_0, squeeze_mask = var_2073_squeeze_mask_0, x = kv_cast_fp16)[name = tensor<string, []>("op_2073_cast_fp16")];
tensor<int32, [4]> v_perm_0 = const()[name = tensor<string, []>("v_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<bool, []> var_2076_transpose_x_0 = const()[name = tensor<string, []>("op_2076_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> var_2076_transpose_y_0 = const()[name = tensor<string, []>("op_2076_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<int32, [4]> transpose_118_perm_0 = const()[name = tensor<string, []>("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_119_perm_0 = const()[name = tensor<string, []>("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 1, 128, 256]> transpose_119 = transpose(perm = transpose_119_perm_0, x = var_2069_cast_fp16)[name = tensor<string, []>("transpose_123")];
tensor<fp16, [1, 1, 1, 128]> transpose_118 = transpose(perm = transpose_118_perm_0, x = var_2061_cast_fp16)[name = tensor<string, []>("transpose_124")];
tensor<fp16, [1, 1, 1, 256]> var_2076_cast_fp16 = matmul(transpose_x = var_2076_transpose_x_0, transpose_y = var_2076_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor<string, []>("op_2076_cast_fp16")];
tensor<fp16, []> var_2077_to_fp16 = const()[name = tensor<string, []>("op_2077_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> attn_93_cast_fp16 = mul(x = var_2076_cast_fp16, y = var_2077_to_fp16)[name = tensor<string, []>("attn_93_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> input_231_cast_fp16 = add(x = attn_93_cast_fp16, y = var_214_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
tensor<fp16, [1, 1, 1, 256]> attn_cast_fp16 = softmax(axis = var_1964, x = input_231_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
tensor<bool, []> out_transpose_x_0 = const()[name = tensor<string, []>("out_transpose_x_0"), val = tensor<bool, []>(false)];
tensor<bool, []> out_transpose_y_0 = const()[name = tensor<string, []>("out_transpose_y_0"), val = tensor<bool, []>(false)];
tensor<fp16, [1, 1, 256, 128]> v_cast_fp16 = transpose(perm = v_perm_0, x = var_2073_cast_fp16)[name = tensor<string, []>("transpose_125")];
tensor<fp16, [1, 1, 1, 128]> out_cast_fp16 = matmul(transpose_x = out_transpose_x_0, transpose_y = out_transpose_y_0, x = attn_cast_fp16, y = v_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
tensor<int32, [4]> var_2088_perm_0 = const()[name = tensor<string, []>("op_2088_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2089 = const()[name = tensor<string, []>("op_2089"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_2088_cast_fp16 = transpose(perm = var_2088_perm_0, x = out_cast_fp16)[name = tensor<string, []>("transpose_122")];
tensor<fp16, [1, 1, 128]> input_233_cast_fp16 = reshape(shape = var_2089, x = var_2088_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
tensor<fp16, [768, 128]> layers_11_cross_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_cross_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172907200)))];
tensor<fp16, [1, 1, 768]> linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_cross_attn_o_proj_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_235_cast_fp16 = add(x = input_229_cast_fp16, y = linear_59_cast_fp16)[name = tensor<string, []>("input_235_cast_fp16")];
tensor<int32, [1]> x_115_axes_0 = const()[name = tensor<string, []>("x_115_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> layers_11_norm_ff_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_norm_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(173103872)))];
tensor<fp16, [1, 1, 768]> x_115_cast_fp16 = layer_norm(axes = x_115_axes_0, epsilon = var_1967_to_fp16, gamma = layers_11_norm_ff_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("x_115_cast_fp16")];
tensor<int32, [3]> input_237_perm_0 = const()[name = tensor<string, []>("input_237_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<string, []> input_239_pad_type_0 = const()[name = tensor<string, []>("input_239_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> input_239_strides_0 = const()[name = tensor<string, []>("input_239_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> input_239_pad_0 = const()[name = tensor<string, []>("input_239_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> input_239_dilations_0 = const()[name = tensor<string, []>("input_239_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> input_239_groups_0 = const()[name = tensor<string, []>("input_239_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [3072, 768, 1]> layers_11_ffn_conv1_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_ffn_conv1_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(173105472)))];
tensor<fp16, [1, 768, 1]> input_237_cast_fp16 = transpose(perm = input_237_perm_0, x = x_115_cast_fp16)[name = tensor<string, []>("transpose_121")];
tensor<fp16, [1, 3072, 1]> input_239_cast_fp16 = conv(dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = layers_11_ffn_conv1_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
tensor<string, []> input_241_mode_0 = const()[name = tensor<string, []>("input_241_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> input_241_cast_fp16 = gelu(mode = input_241_mode_0, x = input_239_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
tensor<string, []> x_117_pad_type_0 = const()[name = tensor<string, []>("x_117_pad_type_0"), val = tensor<string, []>("valid")];
tensor<int32, [1]> x_117_strides_0 = const()[name = tensor<string, []>("x_117_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> x_117_pad_0 = const()[name = tensor<string, []>("x_117_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> x_117_dilations_0 = const()[name = tensor<string, []>("x_117_dilations_0"), val = tensor<int32, [1]>([1])];
tensor<int32, []> x_117_groups_0 = const()[name = tensor<string, []>("x_117_groups_0"), val = tensor<int32, []>(1)];
tensor<fp16, [768, 3072, 1]> layers_11_ffn_conv2_weight_to_fp16 = const()[name = tensor<string, []>("layers_11_ffn_conv2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177824128)))];
tensor<fp16, [1, 768, 1]> x_117_cast_fp16 = conv(dilations = x_117_dilations_0, groups = x_117_groups_0, pad = x_117_pad_0, pad_type = x_117_pad_type_0, strides = x_117_strides_0, weight = layers_11_ffn_conv2_weight_to_fp16, x = input_241_cast_fp16)[name = tensor<string, []>("x_117_cast_fp16")];
tensor<int32, [3]> x_perm_0 = const()[name = tensor<string, []>("x_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> x_cast_fp16 = transpose(perm = x_perm_0, x = x_117_cast_fp16)[name = tensor<string, []>("transpose_120")];
tensor<fp16, [1, 1, 768]> input_243_cast_fp16 = add(x = input_235_cast_fp16, y = x_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> norm_out_weight_to_fp16 = const()[name = tensor<string, []>("norm_out_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182542784)))];
tensor<fp16, []> var_2121_to_fp16 = const()[name = tensor<string, []>("op_2121_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input = layer_norm(axes = input_axes_0, epsilon = var_2121_to_fp16, gamma = norm_out_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
tensor<fp16, [16192, 768]> final_proj_weight_to_fp16 = const()[name = tensor<string, []>("final_proj_weight_to_fp16"), val = tensor<fp16, [16192, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(182544384)))];
tensor<fp16, [16192]> final_proj_bias_to_fp16 = const()[name = tensor<string, []>("final_proj_bias_to_fp16"), val = tensor<fp16, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207415360)))];
tensor<fp16, [1, 1, 16192]> var_2129 = linear(bias = final_proj_bias_to_fp16, weight = final_proj_weight_to_fp16, x = input)[name = tensor<string, []>("linear_60_cast_fp16")];
} -> (var_2129, input, new_k_1, new_v_1, var_169, new_k_3, new_v_3, var_339, new_k_5, new_v_5, var_509, new_k_7, new_v_7, var_679, new_k_9, new_v_9, var_849, new_k_11, new_v_11, var_1019, new_k_13, new_v_13, var_1189, new_k_15, new_v_15, var_1359, new_k_17, new_v_17, var_1529, new_k_19, new_v_19, var_1699, new_k_21, new_v_21, var_1869, new_k, new_v, var_2039);
}