aufklarer's picture
decoder_step_stateful: KV cache as CoreML state (iOS18+, ANE-resident, no per-step IO transfer)
156baee verified
program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})]
{
func main<ios18>(tensor<fp32, [1, 1, 768]> audio_emb, tensor<fp32, [1, 256]> encoder_mask, tensor<fp32, [1, 256, 768]> encoder_output, tensor<int32, [1]> position, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_0, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_1, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_10, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_11, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_2, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_3, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_4, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_5, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_6, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_7, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_8, state<tensor<fp16, [1, 600, 12, 64]>> sa_k_9, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_0, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_1, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_10, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_11, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_2, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_3, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_4, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_5, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_6, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_7, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_8, state<tensor<fp16, [1, 600, 12, 64]>> sa_v_9, tensor<fp32, [1, 256, 1, 128]> xa_k_0, tensor<fp32, [1, 256, 1, 128]> xa_k_1, tensor<fp32, [1, 256, 1, 128]> xa_k_10, tensor<fp32, [1, 256, 1, 128]> xa_k_11, tensor<fp32, [1, 256, 1, 128]> xa_k_2, tensor<fp32, [1, 256, 1, 128]> xa_k_3, tensor<fp32, [1, 256, 1, 128]> xa_k_4, tensor<fp32, [1, 256, 1, 128]> xa_k_5, tensor<fp32, [1, 256, 1, 128]> xa_k_6, tensor<fp32, [1, 256, 1, 128]> xa_k_7, tensor<fp32, [1, 256, 1, 128]> xa_k_8, tensor<fp32, [1, 256, 1, 128]> xa_k_9, tensor<fp32, [1, 256, 1, 128]> xa_v_0, tensor<fp32, [1, 256, 1, 128]> xa_v_1, tensor<fp32, [1, 256, 1, 128]> xa_v_10, tensor<fp32, [1, 256, 1, 128]> xa_v_11, tensor<fp32, [1, 256, 1, 128]> xa_v_2, tensor<fp32, [1, 256, 1, 128]> xa_v_3, tensor<fp32, [1, 256, 1, 128]> xa_v_4, tensor<fp32, [1, 256, 1, 128]> xa_v_5, tensor<fp32, [1, 256, 1, 128]> xa_v_6, tensor<fp32, [1, 256, 1, 128]> xa_v_7, tensor<fp32, [1, 256, 1, 128]> xa_v_8, tensor<fp32, [1, 256, 1, 128]> xa_v_9) {
int32 var_502_batch_dims_0 = const()[name = string("op_502_batch_dims_0"), val = int32(0)];
bool var_502_validate_indices_0 = const()[name = string("op_502_validate_indices_0"), val = bool(false)];
tensor<fp16, [2048, 768]> dec_position_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor<fp16, [2048, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("dec_position_embeddings_weight_to_fp16_quantized")];
string position_to_int16_dtype_0 = const()[name = string("position_to_int16_dtype_0"), val = string("int16")];
string cast_111_dtype_0 = const()[name = string("cast_111_dtype_0"), val = string("int32")];
int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
tensor<int16, [1]> position_to_int16 = cast(dtype = position_to_int16_dtype_0, x = position)[name = string("cast_35")];
tensor<int32, [1]> cast_111 = cast(dtype = cast_111_dtype_0, x = position_to_int16)[name = string("cast_34")];
tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_111, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(2048)];
tensor<int32, [1]> add_0 = add(x = cast_111, y = slice_by_index_0)[name = string("add_0")];
tensor<int32, [1]> select_0 = select(a = cast_111, b = add_0, cond = greater_equal_0)[name = string("select_0")];
string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")];
int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)];
tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_33")];
tensor<int32, [1]> cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_32")];
tensor<bool, [1]> greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")];
int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(2048)];
tensor<int32, [1]> add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")];
tensor<int32, [1]> select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")];
int32 op_502_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_502_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(0)];
tensor<fp16, [1, 768]> op_502_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_502_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_502_batch_dims_0, indices = select_0_1, validate_indices = var_502_validate_indices_0, x = dec_position_embeddings_weight_to_fp16_quantized)[name = string("op_502_cast_fp16_cast_uint16_cast_uint16")];
string audio_emb_to_fp16_dtype_0 = const()[name = string("audio_emb_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [1, 1, 768]> audio_emb_to_fp16 = cast(dtype = audio_emb_to_fp16_dtype_0, x = audio_emb)[name = string("cast_31")];
tensor<fp16, [1, 1, 768]> input_3_cast_fp16 = add(x = audio_emb_to_fp16, y = op_502_cast_fp16_cast_uint16_cast_uint16)[name = string("input_3_cast_fp16")];
tensor<fp16, [600]> idx_range_promoted_to_fp16 = const()[name = string("idx_range_promoted_to_fp16"), val = tensor<fp16, [600]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1577152)))];
string var_515_to_fp16_dtype_0 = const()[name = string("op_515_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [1]> position_to_fp16 = cast(dtype = var_515_to_fp16_dtype_0, x = position)[name = string("cast_30")];
tensor<bool, [600]> var_516_cast_fp16 = less_equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = string("op_516_cast_fp16")];
tensor<int32, [1]> sa_key_mask_axes_0 = const()[name = string("sa_key_mask_axes_0"), val = tensor<int32, [1]>([0])];
string sa_key_mask_1_to_fp16_dtype_0 = const()[name = string("sa_key_mask_1_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [600]> var_516_cast_fp16_to_fp16 = cast(dtype = sa_key_mask_1_to_fp16_dtype_0, x = var_516_cast_fp16)[name = string("cast_29")];
tensor<fp16, [1, 600]> sa_key_mask_cast_fp16 = expand_dims(axes = sa_key_mask_axes_0, x = var_516_cast_fp16_to_fp16)[name = string("sa_key_mask_cast_fp16")];
tensor<int32, [1]> input_5_axes_0 = const()[name = string("input_5_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_0_norm_self_weight_to_fp16 = const()[name = string("dec_layers_0_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578432)))];
fp16 var_525_to_fp16 = const()[name = string("op_525_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, epsilon = var_525_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1580032))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3349568))))[name = string("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [2304]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2304]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3354240)))];
tensor<fp16, [1, 1, 2304]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
tensor<int32, [5]> var_539 = const()[name = string("op_539"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_3_cast_fp16 = reshape(shape = var_539, x = linear_0_cast_fp16)[name = string("qkv_3_cast_fp16")];
tensor<int32, [5]> q_1_begin_0 = const()[name = string("q_1_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_1_end_0 = const()[name = string("q_1_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_1_end_mask_0 = const()[name = string("q_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_1_squeeze_mask_0 = const()[name = string("q_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("q_1_cast_fp16")];
tensor<int32, [5]> new_k_1_begin_0 = const()[name = string("new_k_1_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_1_end_0 = const()[name = string("new_k_1_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_1_end_mask_0 = const()[name = string("new_k_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_1_squeeze_mask_0 = const()[name = string("new_k_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("new_k_1_cast_fp16")];
tensor<int32, [5]> new_v_1_begin_0 = const()[name = string("new_v_1_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_1_end_0 = const()[name = string("new_v_1_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_1_end_mask_0 = const()[name = string("new_v_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_1_squeeze_mask_0 = const()[name = string("new_v_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("new_v_1_cast_fp16")];
tensor<bool, [600]> var_585_cast_fp16 = equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = string("op_585_cast_fp16")];
string write_oh_1_dtype_0 = const()[name = string("write_oh_1_dtype_0"), val = string("fp16")];
tensor<int32, [4]> var_595 = const()[name = string("op_595"), val = tensor<int32, [4]>([1, 600, 1, 1])];
tensor<fp16, [600]> write_oh_1 = cast(dtype = write_oh_1_dtype_0, x = var_585_cast_fp16)[name = string("cast_28")];
tensor<fp16, [1, 600, 1, 1]> write_oh_b_1 = reshape(shape = var_595, x = write_oh_1)[name = string("write_oh_b_1")];
tensor<fp16, [1, 600, 12, 64]> read_state_0 = read_state(input = sa_k_0)[name = string("read_state_0")];
tensor<fp16, [1, 600, 12, 64]> var_608_cast_fp16 = sub(x = new_k_1_cast_fp16, y = read_state_0)[name = string("op_608_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_609_cast_fp16 = mul(x = var_608_cast_fp16, y = write_oh_b_1)[name = string("op_609_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_3_cast_fp16 = add(x = read_state_0, y = var_609_cast_fp16)[name = string("sa_k_buf_3_cast_fp16")];
write_state(data = sa_k_buf_3_cast_fp16, input = sa_k_0)[name = string("coreml_update_state_24_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_24 = read_state(input = sa_k_0)[name = string("coreml_update_state_24")];
tensor<fp16, [1, 600, 12, 64]> read_state_1 = read_state(input = sa_v_0)[name = string("read_state_1")];
tensor<fp16, [1, 600, 12, 64]> var_613_cast_fp16 = sub(x = new_v_1_cast_fp16, y = read_state_1)[name = string("op_613_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_614_cast_fp16 = mul(x = var_613_cast_fp16, y = write_oh_b_1)[name = string("op_614_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_3_cast_fp16 = add(x = read_state_1, y = var_614_cast_fp16)[name = string("sa_v_buf_3_cast_fp16")];
write_state(data = sa_v_buf_3_cast_fp16, input = sa_v_0)[name = string("coreml_update_state_25_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_25 = read_state(input = sa_v_0)[name = string("coreml_update_state_25")];
tensor<int32, [4]> var_643 = const()[name = string("op_643"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_645_transpose_x_0 = const()[name = string("op_645_transpose_x_0"), val = bool(false)];
bool var_645_transpose_y_0 = const()[name = string("op_645_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_97 = transpose(perm = transpose_97_perm_0, x = coreml_update_state_24)[name = string("transpose_262")];
tensor<fp16, [1, 12, 1, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = string("transpose_263")];
tensor<fp16, [1, 12, 1, 600]> var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_0, transpose_y = var_645_transpose_y_0, x = transpose_96, y = transpose_97)[name = string("op_645_cast_fp16")];
fp16 var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_1_cast_fp16 = mul(x = var_645_cast_fp16, y = var_646_to_fp16)[name = string("scores_1_cast_fp16")];
tensor<int32, [1]> var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [1, 1, 600]> var_654_cast_fp16 = expand_dims(axes = var_654_axes_0, x = sa_key_mask_cast_fp16)[name = string("op_654_cast_fp16")];
tensor<int32, [1]> var_656_axes_0 = const()[name = string("op_656_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 1, 1, 600]> var_656_cast_fp16 = expand_dims(axes = var_656_axes_0, x = var_654_cast_fp16)[name = string("op_656_cast_fp16")];
fp16 var_662_promoted_to_fp16 = const()[name = string("op_662_promoted_to_fp16"), val = fp16(0x0p+0)];
tensor<bool, [1, 1, 1, 600]> var_663_cast_fp16 = equal(x = var_656_cast_fp16, y = var_662_promoted_to_fp16)[name = string("op_663_cast_fp16")];
fp16 var_664_to_fp16 = const()[name = string("op_664_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_3_cast_fp16 = select(a = var_664_to_fp16, b = scores_1_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_3_cast_fp16")];
int32 var_666 = const()[name = string("op_666"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_1_cast_fp16 = softmax(axis = var_666, x = scores_3_cast_fp16)[name = string("probs_1_cast_fp16")];
bool var_669_transpose_x_0 = const()[name = string("op_669_transpose_x_0"), val = bool(false)];
bool var_669_transpose_y_0 = const()[name = string("op_669_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_1_cast_fp16 = transpose(perm = var_643, x = coreml_update_state_25)[name = string("transpose_261")];
tensor<fp16, [1, 12, 1, 64]> var_669_cast_fp16 = matmul(transpose_x = var_669_transpose_x_0, transpose_y = var_669_transpose_y_0, x = probs_1_cast_fp16, y = v_t_1_cast_fp16)[name = string("op_669_cast_fp16")];
tensor<int32, [4]> var_674 = const()[name = string("op_674"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_679 = const()[name = string("op_679"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_675_cast_fp16 = transpose(perm = var_674, x = var_669_cast_fp16)[name = string("transpose_260")];
tensor<fp16, [1, 1, 768]> input_7_cast_fp16 = reshape(shape = var_679, x = var_675_cast_fp16)[name = string("input_7_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3358912))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3948800))))[name = string("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3950400)))];
tensor<fp16, [1, 1, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_1_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = string("input_9_cast_fp16")];
tensor<int32, [1]> input_11_axes_0 = const()[name = string("input_11_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3952000)))];
fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_11_cast_fp16 = layer_norm(axes = input_11_axes_0, epsilon = var_687_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_9_cast_fp16)[name = string("input_11_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3953600))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4051968))))[name = string("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [128]> linear_2_bias_0_to_fp16 = const()[name = string("linear_2_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052288)))];
tensor<fp16, [1, 1, 128]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_2_cast_fp16")];
tensor<int32, [4]> var_700 = const()[name = string("op_700"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_1_cast_fp16 = reshape(shape = var_700, x = linear_2_cast_fp16)[name = string("xq_proj_1_cast_fp16")];
tensor<int32, [4]> var_718 = const()[name = string("op_718"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_0_to_fp16_dtype_0 = const()[name = string("xa_v_0_to_fp16_dtype_0"), val = string("fp16")];
bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)];
bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)];
string xa_k_0_to_fp16_dtype_0 = const()[name = string("xa_k_0_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_0_to_fp16 = cast(dtype = xa_k_0_to_fp16_dtype_0, x = xa_k_0)[name = string("cast_27")];
tensor<fp16, [1, 1, 128, 256]> transpose_99 = transpose(perm = transpose_99_perm_0, x = xa_k_0_to_fp16)[name = string("transpose_258")];
tensor<fp16, [1, 1, 1, 128]> transpose_98 = transpose(perm = transpose_98_perm_0, x = xq_proj_1_cast_fp16)[name = string("transpose_259")];
tensor<fp16, [1, 1, 1, 256]> var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = transpose_98, y = transpose_99)[name = string("op_720_cast_fp16")];
fp16 var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_1_cast_fp16 = mul(x = var_720_cast_fp16, y = var_721_to_fp16)[name = string("xscores_1_cast_fp16")];
tensor<int32, [1]> var_729_axes_0 = const()[name = string("op_729_axes_0"), val = tensor<int32, [1]>([1])];
string encoder_mask_to_fp16_dtype_0 = const()[name = string("encoder_mask_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [1, 256]> encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = string("cast_26")];
tensor<fp16, [1, 1, 256]> var_729_cast_fp16 = expand_dims(axes = var_729_axes_0, x = encoder_mask_to_fp16)[name = string("op_729_cast_fp16")];
tensor<int32, [1]> var_731_axes_0 = const()[name = string("op_731_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 1, 1, 256]> var_731_cast_fp16 = expand_dims(axes = var_731_axes_0, x = var_729_cast_fp16)[name = string("op_731_cast_fp16")];
fp16 var_737_promoted_to_fp16 = const()[name = string("op_737_promoted_to_fp16"), val = fp16(0x0p+0)];
tensor<bool, [1, 1, 1, 256]> var_738_cast_fp16 = equal(x = var_731_cast_fp16, y = var_737_promoted_to_fp16)[name = string("op_738_cast_fp16")];
fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_3_cast_fp16 = select(a = var_739_to_fp16, b = xscores_1_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_3_cast_fp16")];
int32 var_741 = const()[name = string("op_741"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_1_cast_fp16 = softmax(axis = var_741, x = xscores_3_cast_fp16)[name = string("xprobs_1_cast_fp16")];
bool var_744_transpose_x_0 = const()[name = string("op_744_transpose_x_0"), val = bool(false)];
bool var_744_transpose_y_0 = const()[name = string("op_744_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_0_to_fp16 = cast(dtype = xa_v_0_to_fp16_dtype_0, x = xa_v_0)[name = string("cast_25")];
tensor<fp16, [1, 1, 256, 128]> xvT_1_cast_fp16 = transpose(perm = var_718, x = xa_v_0_to_fp16)[name = string("transpose_257")];
tensor<fp16, [1, 1, 1, 128]> var_744_cast_fp16 = matmul(transpose_x = var_744_transpose_x_0, transpose_y = var_744_transpose_y_0, x = xprobs_1_cast_fp16, y = xvT_1_cast_fp16)[name = string("op_744_cast_fp16")];
tensor<int32, [4]> var_749 = const()[name = string("op_749"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_754 = const()[name = string("op_754"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_750_cast_fp16 = transpose(perm = var_749, x = var_744_cast_fp16)[name = string("transpose_256")];
tensor<fp16, [1, 1, 128]> input_13_cast_fp16 = reshape(shape = var_754, x = var_750_cast_fp16)[name = string("input_13_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052608))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4150976))))[name = string("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = string("linear_3_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_3_cast_fp16)[name = string("input_15_cast_fp16")];
tensor<int32, [1]> x_1_axes_0 = const()[name = string("x_1_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4152576)))];
fp16 var_762_to_fp16 = const()[name = string("op_762_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_762_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_15_cast_fp16)[name = string("x_1_cast_fp16")];
tensor<int32, [3]> var_778 = const()[name = string("op_778"), val = tensor<int32, [3]>([0, 2, 1])];
string y_1_pad_type_0 = const()[name = string("y_1_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_1_strides_0 = const()[name = string("y_1_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_1_pad_0 = const()[name = string("y_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_1_dilations_0 = const()[name = string("y_1_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_1_groups_0 = const()[name = string("y_1_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4154176))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6513536))))[name = string("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_3_cast_fp16 = transpose(perm = var_778, x = x_1_cast_fp16)[name = string("transpose_255")];
tensor<fp16, [1, 3072, 1]> y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_3_cast_fp16)[name = string("y_1_cast_fp16")];
string x_5_mode_0 = const()[name = string("x_5_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_5_cast_fp16 = gelu(mode = x_5_mode_0, x = y_1_cast_fp16)[name = string("x_5_cast_fp16")];
string y_3_pad_type_0 = const()[name = string("y_3_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_3_strides_0 = const()[name = string("y_3_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_3_pad_0 = const()[name = string("y_3_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_3_dilations_0 = const()[name = string("y_3_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_3_groups_0 = const()[name = string("y_3_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6519744))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8879104))))[name = string("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = string("y_3_cast_fp16")];
tensor<int32, [3]> var_796 = const()[name = string("op_796"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_797_cast_fp16 = transpose(perm = var_796, x = y_3_cast_fp16)[name = string("transpose_254")];
tensor<fp16, [1, 1, 768]> input_17_cast_fp16 = add(x = input_15_cast_fp16, y = var_797_cast_fp16)[name = string("input_17_cast_fp16")];
tensor<int32, [1]> input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_1_norm_self_weight_to_fp16 = const()[name = string("dec_layers_1_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8880704)))];
fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_19_cast_fp16 = layer_norm(axes = input_19_axes_0, epsilon = var_801_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8882304))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10651840))))[name = string("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_4_cast_fp16")];
tensor<int32, [5]> var_815 = const()[name = string("op_815"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_7_cast_fp16 = reshape(shape = var_815, x = linear_4_cast_fp16)[name = string("qkv_7_cast_fp16")];
tensor<int32, [5]> q_3_begin_0 = const()[name = string("q_3_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_3_end_0 = const()[name = string("q_3_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_3_end_mask_0 = const()[name = string("q_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_3_squeeze_mask_0 = const()[name = string("q_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_3_cast_fp16 = slice_by_index(begin = q_3_begin_0, end = q_3_end_0, end_mask = q_3_end_mask_0, squeeze_mask = q_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("q_3_cast_fp16")];
tensor<int32, [5]> new_k_3_begin_0 = const()[name = string("new_k_3_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_3_end_0 = const()[name = string("new_k_3_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_3_end_mask_0 = const()[name = string("new_k_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_3_squeeze_mask_0 = const()[name = string("new_k_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("new_k_3_cast_fp16")];
tensor<int32, [5]> new_v_3_begin_0 = const()[name = string("new_v_3_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_3_end_0 = const()[name = string("new_v_3_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_3_end_mask_0 = const()[name = string("new_v_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_3_squeeze_mask_0 = const()[name = string("new_v_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("new_v_3_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_2 = read_state(input = sa_k_1)[name = string("read_state_2")];
tensor<fp16, [1, 600, 12, 64]> var_884_cast_fp16 = sub(x = new_k_3_cast_fp16, y = read_state_2)[name = string("op_884_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_885_cast_fp16 = mul(x = var_884_cast_fp16, y = write_oh_b_1)[name = string("op_885_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_7_cast_fp16 = add(x = read_state_2, y = var_885_cast_fp16)[name = string("sa_k_buf_7_cast_fp16")];
write_state(data = sa_k_buf_7_cast_fp16, input = sa_k_1)[name = string("coreml_update_state_26_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_26 = read_state(input = sa_k_1)[name = string("coreml_update_state_26")];
tensor<fp16, [1, 600, 12, 64]> read_state_3 = read_state(input = sa_v_1)[name = string("read_state_3")];
tensor<fp16, [1, 600, 12, 64]> var_889_cast_fp16 = sub(x = new_v_3_cast_fp16, y = read_state_3)[name = string("op_889_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_890_cast_fp16 = mul(x = var_889_cast_fp16, y = write_oh_b_1)[name = string("op_890_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_7_cast_fp16 = add(x = read_state_3, y = var_890_cast_fp16)[name = string("sa_v_buf_7_cast_fp16")];
write_state(data = sa_v_buf_7_cast_fp16, input = sa_v_1)[name = string("coreml_update_state_27_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_27 = read_state(input = sa_v_1)[name = string("coreml_update_state_27")];
tensor<int32, [4]> var_919 = const()[name = string("op_919"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_921_transpose_x_0 = const()[name = string("op_921_transpose_x_0"), val = bool(false)];
bool var_921_transpose_y_0 = const()[name = string("op_921_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_101 = transpose(perm = transpose_101_perm_0, x = coreml_update_state_26)[name = string("transpose_252")];
tensor<fp16, [1, 12, 1, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = q_3_cast_fp16)[name = string("transpose_253")];
tensor<fp16, [1, 12, 1, 600]> var_921_cast_fp16 = matmul(transpose_x = var_921_transpose_x_0, transpose_y = var_921_transpose_y_0, x = transpose_100, y = transpose_101)[name = string("op_921_cast_fp16")];
fp16 var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_5_cast_fp16 = mul(x = var_921_cast_fp16, y = var_922_to_fp16)[name = string("scores_5_cast_fp16")];
fp16 var_940_to_fp16 = const()[name = string("op_940_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_7_cast_fp16 = select(a = var_940_to_fp16, b = scores_5_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_7_cast_fp16")];
int32 var_942 = const()[name = string("op_942"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_3_cast_fp16 = softmax(axis = var_942, x = scores_7_cast_fp16)[name = string("probs_3_cast_fp16")];
bool var_945_transpose_x_0 = const()[name = string("op_945_transpose_x_0"), val = bool(false)];
bool var_945_transpose_y_0 = const()[name = string("op_945_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_3_cast_fp16 = transpose(perm = var_919, x = coreml_update_state_27)[name = string("transpose_251")];
tensor<fp16, [1, 12, 1, 64]> var_945_cast_fp16 = matmul(transpose_x = var_945_transpose_x_0, transpose_y = var_945_transpose_y_0, x = probs_3_cast_fp16, y = v_t_3_cast_fp16)[name = string("op_945_cast_fp16")];
tensor<int32, [4]> var_950 = const()[name = string("op_950"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_955 = const()[name = string("op_955"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_951_cast_fp16 = transpose(perm = var_950, x = var_945_cast_fp16)[name = string("transpose_250")];
tensor<fp16, [1, 1, 768]> input_21_cast_fp16 = reshape(shape = var_955, x = var_951_cast_fp16)[name = string("input_21_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10656512))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11246400))))[name = string("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = string("linear_5_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_23_cast_fp16 = add(x = input_17_cast_fp16, y = linear_5_cast_fp16)[name = string("input_23_cast_fp16")];
tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11248000)))];
fp16 var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, epsilon = var_963_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11249600))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11347968))))[name = string("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_6_cast_fp16")];
tensor<int32, [4]> var_976 = const()[name = string("op_976"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_3_cast_fp16 = reshape(shape = var_976, x = linear_6_cast_fp16)[name = string("xq_proj_3_cast_fp16")];
tensor<int32, [4]> var_994 = const()[name = string("op_994"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_1_to_fp16_dtype_0 = const()[name = string("xa_v_1_to_fp16_dtype_0"), val = string("fp16")];
bool var_996_transpose_x_0 = const()[name = string("op_996_transpose_x_0"), val = bool(false)];
bool var_996_transpose_y_0 = const()[name = string("op_996_transpose_y_0"), val = bool(false)];
string xa_k_1_to_fp16_dtype_0 = const()[name = string("xa_k_1_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_1_to_fp16 = cast(dtype = xa_k_1_to_fp16_dtype_0, x = xa_k_1)[name = string("cast_24")];
tensor<fp16, [1, 1, 128, 256]> transpose_103 = transpose(perm = transpose_103_perm_0, x = xa_k_1_to_fp16)[name = string("transpose_248")];
tensor<fp16, [1, 1, 1, 128]> transpose_102 = transpose(perm = transpose_102_perm_0, x = xq_proj_3_cast_fp16)[name = string("transpose_249")];
tensor<fp16, [1, 1, 1, 256]> var_996_cast_fp16 = matmul(transpose_x = var_996_transpose_x_0, transpose_y = var_996_transpose_y_0, x = transpose_102, y = transpose_103)[name = string("op_996_cast_fp16")];
fp16 var_997_to_fp16 = const()[name = string("op_997_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_5_cast_fp16 = mul(x = var_996_cast_fp16, y = var_997_to_fp16)[name = string("xscores_5_cast_fp16")];
fp16 var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_7_cast_fp16 = select(a = var_1015_to_fp16, b = xscores_5_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_7_cast_fp16")];
int32 var_1017 = const()[name = string("op_1017"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_3_cast_fp16 = softmax(axis = var_1017, x = xscores_7_cast_fp16)[name = string("xprobs_3_cast_fp16")];
bool var_1020_transpose_x_0 = const()[name = string("op_1020_transpose_x_0"), val = bool(false)];
bool var_1020_transpose_y_0 = const()[name = string("op_1020_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_1_to_fp16 = cast(dtype = xa_v_1_to_fp16_dtype_0, x = xa_v_1)[name = string("cast_23")];
tensor<fp16, [1, 1, 256, 128]> xvT_3_cast_fp16 = transpose(perm = var_994, x = xa_v_1_to_fp16)[name = string("transpose_247")];
tensor<fp16, [1, 1, 1, 128]> var_1020_cast_fp16 = matmul(transpose_x = var_1020_transpose_x_0, transpose_y = var_1020_transpose_y_0, x = xprobs_3_cast_fp16, y = xvT_3_cast_fp16)[name = string("op_1020_cast_fp16")];
tensor<int32, [4]> var_1025 = const()[name = string("op_1025"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1030 = const()[name = string("op_1030"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1026_cast_fp16 = transpose(perm = var_1025, x = var_1020_cast_fp16)[name = string("transpose_246")];
tensor<fp16, [1, 1, 128]> input_27_cast_fp16 = reshape(shape = var_1030, x = var_1026_cast_fp16)[name = string("input_27_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11348288))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11446656))))[name = string("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_7_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_7_cast_fp16)[name = string("input_29_cast_fp16")];
tensor<int32, [1]> x_9_axes_0 = const()[name = string("x_9_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11448256)))];
fp16 var_1038_to_fp16 = const()[name = string("op_1038_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_9_cast_fp16 = layer_norm(axes = x_9_axes_0, epsilon = var_1038_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = string("x_9_cast_fp16")];
tensor<int32, [3]> var_1054 = const()[name = string("op_1054"), val = tensor<int32, [3]>([0, 2, 1])];
string y_5_pad_type_0 = const()[name = string("y_5_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_5_strides_0 = const()[name = string("y_5_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_5_pad_0 = const()[name = string("y_5_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_5_dilations_0 = const()[name = string("y_5_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_5_groups_0 = const()[name = string("y_5_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11449856))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13809216))))[name = string("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_11_cast_fp16 = transpose(perm = var_1054, x = x_9_cast_fp16)[name = string("transpose_245")];
tensor<fp16, [1, 3072, 1]> y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = string("y_5_cast_fp16")];
string x_13_mode_0 = const()[name = string("x_13_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_5_cast_fp16)[name = string("x_13_cast_fp16")];
string y_7_pad_type_0 = const()[name = string("y_7_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_7_strides_0 = const()[name = string("y_7_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_7_pad_0 = const()[name = string("y_7_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_7_dilations_0 = const()[name = string("y_7_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_7_groups_0 = const()[name = string("y_7_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13815424))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16174784))))[name = string("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = string("y_7_cast_fp16")];
tensor<int32, [3]> var_1072 = const()[name = string("op_1072"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_1073_cast_fp16 = transpose(perm = var_1072, x = y_7_cast_fp16)[name = string("transpose_244")];
tensor<fp16, [1, 1, 768]> input_31_cast_fp16 = add(x = input_29_cast_fp16, y = var_1073_cast_fp16)[name = string("input_31_cast_fp16")];
tensor<int32, [1]> input_33_axes_0 = const()[name = string("input_33_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_2_norm_self_weight_to_fp16 = const()[name = string("dec_layers_2_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16176384)))];
fp16 var_1077_to_fp16 = const()[name = string("op_1077_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_1077_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = string("input_33_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16177984))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17947520))))[name = string("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_8_cast_fp16")];
tensor<int32, [5]> var_1091 = const()[name = string("op_1091"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_11_cast_fp16 = reshape(shape = var_1091, x = linear_8_cast_fp16)[name = string("qkv_11_cast_fp16")];
tensor<int32, [5]> q_5_begin_0 = const()[name = string("q_5_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_5_end_0 = const()[name = string("q_5_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_5_end_mask_0 = const()[name = string("q_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_5_squeeze_mask_0 = const()[name = string("q_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = string("q_5_cast_fp16")];
tensor<int32, [5]> new_k_5_begin_0 = const()[name = string("new_k_5_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_5_end_0 = const()[name = string("new_k_5_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_5_end_mask_0 = const()[name = string("new_k_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_5_squeeze_mask_0 = const()[name = string("new_k_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = string("new_k_5_cast_fp16")];
tensor<int32, [5]> new_v_5_begin_0 = const()[name = string("new_v_5_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_5_end_0 = const()[name = string("new_v_5_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_5_end_mask_0 = const()[name = string("new_v_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_5_squeeze_mask_0 = const()[name = string("new_v_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = string("new_v_5_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_4 = read_state(input = sa_k_2)[name = string("read_state_4")];
tensor<fp16, [1, 600, 12, 64]> var_1160_cast_fp16 = sub(x = new_k_5_cast_fp16, y = read_state_4)[name = string("op_1160_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1161_cast_fp16 = mul(x = var_1160_cast_fp16, y = write_oh_b_1)[name = string("op_1161_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_11_cast_fp16 = add(x = read_state_4, y = var_1161_cast_fp16)[name = string("sa_k_buf_11_cast_fp16")];
write_state(data = sa_k_buf_11_cast_fp16, input = sa_k_2)[name = string("coreml_update_state_28_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_28 = read_state(input = sa_k_2)[name = string("coreml_update_state_28")];
tensor<fp16, [1, 600, 12, 64]> read_state_5 = read_state(input = sa_v_2)[name = string("read_state_5")];
tensor<fp16, [1, 600, 12, 64]> var_1165_cast_fp16 = sub(x = new_v_5_cast_fp16, y = read_state_5)[name = string("op_1165_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1166_cast_fp16 = mul(x = var_1165_cast_fp16, y = write_oh_b_1)[name = string("op_1166_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_11_cast_fp16 = add(x = read_state_5, y = var_1166_cast_fp16)[name = string("sa_v_buf_11_cast_fp16")];
write_state(data = sa_v_buf_11_cast_fp16, input = sa_v_2)[name = string("coreml_update_state_29_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_29 = read_state(input = sa_v_2)[name = string("coreml_update_state_29")];
tensor<int32, [4]> var_1195 = const()[name = string("op_1195"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_1197_transpose_x_0 = const()[name = string("op_1197_transpose_x_0"), val = bool(false)];
bool var_1197_transpose_y_0 = const()[name = string("op_1197_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_105 = transpose(perm = transpose_105_perm_0, x = coreml_update_state_28)[name = string("transpose_242")];
tensor<fp16, [1, 12, 1, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = q_5_cast_fp16)[name = string("transpose_243")];
tensor<fp16, [1, 12, 1, 600]> var_1197_cast_fp16 = matmul(transpose_x = var_1197_transpose_x_0, transpose_y = var_1197_transpose_y_0, x = transpose_104, y = transpose_105)[name = string("op_1197_cast_fp16")];
fp16 var_1198_to_fp16 = const()[name = string("op_1198_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_9_cast_fp16 = mul(x = var_1197_cast_fp16, y = var_1198_to_fp16)[name = string("scores_9_cast_fp16")];
fp16 var_1216_to_fp16 = const()[name = string("op_1216_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_11_cast_fp16 = select(a = var_1216_to_fp16, b = scores_9_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_11_cast_fp16")];
int32 var_1218 = const()[name = string("op_1218"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_5_cast_fp16 = softmax(axis = var_1218, x = scores_11_cast_fp16)[name = string("probs_5_cast_fp16")];
bool var_1221_transpose_x_0 = const()[name = string("op_1221_transpose_x_0"), val = bool(false)];
bool var_1221_transpose_y_0 = const()[name = string("op_1221_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_5_cast_fp16 = transpose(perm = var_1195, x = coreml_update_state_29)[name = string("transpose_241")];
tensor<fp16, [1, 12, 1, 64]> var_1221_cast_fp16 = matmul(transpose_x = var_1221_transpose_x_0, transpose_y = var_1221_transpose_y_0, x = probs_5_cast_fp16, y = v_t_5_cast_fp16)[name = string("op_1221_cast_fp16")];
tensor<int32, [4]> var_1226 = const()[name = string("op_1226"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1231 = const()[name = string("op_1231"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1227_cast_fp16 = transpose(perm = var_1226, x = var_1221_cast_fp16)[name = string("transpose_240")];
tensor<fp16, [1, 1, 768]> input_35_cast_fp16 = reshape(shape = var_1231, x = var_1227_cast_fp16)[name = string("input_35_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17952192))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18542080))))[name = string("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_9_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_37_cast_fp16 = add(x = input_31_cast_fp16, y = linear_9_cast_fp16)[name = string("input_37_cast_fp16")];
tensor<int32, [1]> input_39_axes_0 = const()[name = string("input_39_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18543680)))];
fp16 var_1239_to_fp16 = const()[name = string("op_1239_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_39_cast_fp16 = layer_norm(axes = input_39_axes_0, epsilon = var_1239_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18545280))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18643648))))[name = string("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_10_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_10_cast_fp16")];
tensor<int32, [4]> var_1252 = const()[name = string("op_1252"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_5_cast_fp16 = reshape(shape = var_1252, x = linear_10_cast_fp16)[name = string("xq_proj_5_cast_fp16")];
tensor<int32, [4]> var_1270 = const()[name = string("op_1270"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_2_to_fp16_dtype_0 = const()[name = string("xa_v_2_to_fp16_dtype_0"), val = string("fp16")];
bool var_1272_transpose_x_0 = const()[name = string("op_1272_transpose_x_0"), val = bool(false)];
bool var_1272_transpose_y_0 = const()[name = string("op_1272_transpose_y_0"), val = bool(false)];
string xa_k_2_to_fp16_dtype_0 = const()[name = string("xa_k_2_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_2_to_fp16 = cast(dtype = xa_k_2_to_fp16_dtype_0, x = xa_k_2)[name = string("cast_22")];
tensor<fp16, [1, 1, 128, 256]> transpose_107 = transpose(perm = transpose_107_perm_0, x = xa_k_2_to_fp16)[name = string("transpose_238")];
tensor<fp16, [1, 1, 1, 128]> transpose_106 = transpose(perm = transpose_106_perm_0, x = xq_proj_5_cast_fp16)[name = string("transpose_239")];
tensor<fp16, [1, 1, 1, 256]> var_1272_cast_fp16 = matmul(transpose_x = var_1272_transpose_x_0, transpose_y = var_1272_transpose_y_0, x = transpose_106, y = transpose_107)[name = string("op_1272_cast_fp16")];
fp16 var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_9_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = string("xscores_9_cast_fp16")];
fp16 var_1291_to_fp16 = const()[name = string("op_1291_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_11_cast_fp16 = select(a = var_1291_to_fp16, b = xscores_9_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_11_cast_fp16")];
int32 var_1293 = const()[name = string("op_1293"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_5_cast_fp16 = softmax(axis = var_1293, x = xscores_11_cast_fp16)[name = string("xprobs_5_cast_fp16")];
bool var_1296_transpose_x_0 = const()[name = string("op_1296_transpose_x_0"), val = bool(false)];
bool var_1296_transpose_y_0 = const()[name = string("op_1296_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_2_to_fp16 = cast(dtype = xa_v_2_to_fp16_dtype_0, x = xa_v_2)[name = string("cast_21")];
tensor<fp16, [1, 1, 256, 128]> xvT_5_cast_fp16 = transpose(perm = var_1270, x = xa_v_2_to_fp16)[name = string("transpose_237")];
tensor<fp16, [1, 1, 1, 128]> var_1296_cast_fp16 = matmul(transpose_x = var_1296_transpose_x_0, transpose_y = var_1296_transpose_y_0, x = xprobs_5_cast_fp16, y = xvT_5_cast_fp16)[name = string("op_1296_cast_fp16")];
tensor<int32, [4]> var_1301 = const()[name = string("op_1301"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1306 = const()[name = string("op_1306"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1302_cast_fp16 = transpose(perm = var_1301, x = var_1296_cast_fp16)[name = string("transpose_236")];
tensor<fp16, [1, 1, 128]> input_41_cast_fp16 = reshape(shape = var_1306, x = var_1302_cast_fp16)[name = string("input_41_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18643968))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18742336))))[name = string("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_11_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_43_cast_fp16 = add(x = input_37_cast_fp16, y = linear_11_cast_fp16)[name = string("input_43_cast_fp16")];
tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18743936)))];
fp16 var_1314_to_fp16 = const()[name = string("op_1314_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, epsilon = var_1314_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_43_cast_fp16)[name = string("x_17_cast_fp16")];
tensor<int32, [3]> var_1330 = const()[name = string("op_1330"), val = tensor<int32, [3]>([0, 2, 1])];
string y_9_pad_type_0 = const()[name = string("y_9_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_9_strides_0 = const()[name = string("y_9_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_9_pad_0 = const()[name = string("y_9_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_9_dilations_0 = const()[name = string("y_9_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_9_groups_0 = const()[name = string("y_9_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18745536))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21104896))))[name = string("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_19_cast_fp16 = transpose(perm = var_1330, x = x_17_cast_fp16)[name = string("transpose_235")];
tensor<fp16, [1, 3072, 1]> y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_19_cast_fp16)[name = string("y_9_cast_fp16")];
string x_21_mode_0 = const()[name = string("x_21_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_21_cast_fp16 = gelu(mode = x_21_mode_0, x = y_9_cast_fp16)[name = string("x_21_cast_fp16")];
string y_11_pad_type_0 = const()[name = string("y_11_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_11_strides_0 = const()[name = string("y_11_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_11_pad_0 = const()[name = string("y_11_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_11_dilations_0 = const()[name = string("y_11_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_11_groups_0 = const()[name = string("y_11_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21111104))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23470464))))[name = string("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = string("y_11_cast_fp16")];
tensor<int32, [3]> var_1348 = const()[name = string("op_1348"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_1349_cast_fp16 = transpose(perm = var_1348, x = y_11_cast_fp16)[name = string("transpose_234")];
tensor<fp16, [1, 1, 768]> input_45_cast_fp16 = add(x = input_43_cast_fp16, y = var_1349_cast_fp16)[name = string("input_45_cast_fp16")];
tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_3_norm_self_weight_to_fp16 = const()[name = string("dec_layers_3_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23472064)))];
fp16 var_1353_to_fp16 = const()[name = string("op_1353_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, epsilon = var_1353_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23473664))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25243200))))[name = string("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_12_cast_fp16")];
tensor<int32, [5]> var_1367 = const()[name = string("op_1367"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_15_cast_fp16 = reshape(shape = var_1367, x = linear_12_cast_fp16)[name = string("qkv_15_cast_fp16")];
tensor<int32, [5]> q_7_begin_0 = const()[name = string("q_7_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_7_end_0 = const()[name = string("q_7_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_7_end_mask_0 = const()[name = string("q_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_7_squeeze_mask_0 = const()[name = string("q_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_7_cast_fp16 = slice_by_index(begin = q_7_begin_0, end = q_7_end_0, end_mask = q_7_end_mask_0, squeeze_mask = q_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = string("q_7_cast_fp16")];
tensor<int32, [5]> new_k_7_begin_0 = const()[name = string("new_k_7_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_7_end_0 = const()[name = string("new_k_7_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_7_end_mask_0 = const()[name = string("new_k_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_7_squeeze_mask_0 = const()[name = string("new_k_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = string("new_k_7_cast_fp16")];
tensor<int32, [5]> new_v_7_begin_0 = const()[name = string("new_v_7_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_7_end_0 = const()[name = string("new_v_7_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_7_end_mask_0 = const()[name = string("new_v_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_7_squeeze_mask_0 = const()[name = string("new_v_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = string("new_v_7_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_6 = read_state(input = sa_k_3)[name = string("read_state_6")];
tensor<fp16, [1, 600, 12, 64]> var_1436_cast_fp16 = sub(x = new_k_7_cast_fp16, y = read_state_6)[name = string("op_1436_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1437_cast_fp16 = mul(x = var_1436_cast_fp16, y = write_oh_b_1)[name = string("op_1437_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_15_cast_fp16 = add(x = read_state_6, y = var_1437_cast_fp16)[name = string("sa_k_buf_15_cast_fp16")];
write_state(data = sa_k_buf_15_cast_fp16, input = sa_k_3)[name = string("coreml_update_state_30_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_30 = read_state(input = sa_k_3)[name = string("coreml_update_state_30")];
tensor<fp16, [1, 600, 12, 64]> read_state_7 = read_state(input = sa_v_3)[name = string("read_state_7")];
tensor<fp16, [1, 600, 12, 64]> var_1441_cast_fp16 = sub(x = new_v_7_cast_fp16, y = read_state_7)[name = string("op_1441_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1442_cast_fp16 = mul(x = var_1441_cast_fp16, y = write_oh_b_1)[name = string("op_1442_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_15_cast_fp16 = add(x = read_state_7, y = var_1442_cast_fp16)[name = string("sa_v_buf_15_cast_fp16")];
write_state(data = sa_v_buf_15_cast_fp16, input = sa_v_3)[name = string("coreml_update_state_31_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_31 = read_state(input = sa_v_3)[name = string("coreml_update_state_31")];
tensor<int32, [4]> var_1471 = const()[name = string("op_1471"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_1473_transpose_x_0 = const()[name = string("op_1473_transpose_x_0"), val = bool(false)];
bool var_1473_transpose_y_0 = const()[name = string("op_1473_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_109 = transpose(perm = transpose_109_perm_0, x = coreml_update_state_30)[name = string("transpose_232")];
tensor<fp16, [1, 12, 1, 64]> transpose_108 = transpose(perm = transpose_108_perm_0, x = q_7_cast_fp16)[name = string("transpose_233")];
tensor<fp16, [1, 12, 1, 600]> var_1473_cast_fp16 = matmul(transpose_x = var_1473_transpose_x_0, transpose_y = var_1473_transpose_y_0, x = transpose_108, y = transpose_109)[name = string("op_1473_cast_fp16")];
fp16 var_1474_to_fp16 = const()[name = string("op_1474_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_13_cast_fp16 = mul(x = var_1473_cast_fp16, y = var_1474_to_fp16)[name = string("scores_13_cast_fp16")];
fp16 var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_15_cast_fp16 = select(a = var_1492_to_fp16, b = scores_13_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_15_cast_fp16")];
int32 var_1494 = const()[name = string("op_1494"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_7_cast_fp16 = softmax(axis = var_1494, x = scores_15_cast_fp16)[name = string("probs_7_cast_fp16")];
bool var_1497_transpose_x_0 = const()[name = string("op_1497_transpose_x_0"), val = bool(false)];
bool var_1497_transpose_y_0 = const()[name = string("op_1497_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_7_cast_fp16 = transpose(perm = var_1471, x = coreml_update_state_31)[name = string("transpose_231")];
tensor<fp16, [1, 12, 1, 64]> var_1497_cast_fp16 = matmul(transpose_x = var_1497_transpose_x_0, transpose_y = var_1497_transpose_y_0, x = probs_7_cast_fp16, y = v_t_7_cast_fp16)[name = string("op_1497_cast_fp16")];
tensor<int32, [4]> var_1502 = const()[name = string("op_1502"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1507 = const()[name = string("op_1507"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1503_cast_fp16 = transpose(perm = var_1502, x = var_1497_cast_fp16)[name = string("transpose_230")];
tensor<fp16, [1, 1, 768]> input_49_cast_fp16 = reshape(shape = var_1507, x = var_1503_cast_fp16)[name = string("input_49_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25247872))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25837760))))[name = string("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_13_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_51_cast_fp16 = add(x = input_45_cast_fp16, y = linear_13_cast_fp16)[name = string("input_51_cast_fp16")];
tensor<int32, [1]> input_53_axes_0 = const()[name = string("input_53_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25839360)))];
fp16 var_1515_to_fp16 = const()[name = string("op_1515_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, epsilon = var_1515_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_51_cast_fp16)[name = string("input_53_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25840960))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25939328))))[name = string("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = input_53_cast_fp16)[name = string("linear_14_cast_fp16")];
tensor<int32, [4]> var_1528 = const()[name = string("op_1528"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_7_cast_fp16 = reshape(shape = var_1528, x = linear_14_cast_fp16)[name = string("xq_proj_7_cast_fp16")];
tensor<int32, [4]> var_1546 = const()[name = string("op_1546"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_3_to_fp16_dtype_0 = const()[name = string("xa_v_3_to_fp16_dtype_0"), val = string("fp16")];
bool var_1548_transpose_x_0 = const()[name = string("op_1548_transpose_x_0"), val = bool(false)];
bool var_1548_transpose_y_0 = const()[name = string("op_1548_transpose_y_0"), val = bool(false)];
string xa_k_3_to_fp16_dtype_0 = const()[name = string("xa_k_3_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_3_to_fp16 = cast(dtype = xa_k_3_to_fp16_dtype_0, x = xa_k_3)[name = string("cast_20")];
tensor<fp16, [1, 1, 128, 256]> transpose_111 = transpose(perm = transpose_111_perm_0, x = xa_k_3_to_fp16)[name = string("transpose_228")];
tensor<fp16, [1, 1, 1, 128]> transpose_110 = transpose(perm = transpose_110_perm_0, x = xq_proj_7_cast_fp16)[name = string("transpose_229")];
tensor<fp16, [1, 1, 1, 256]> var_1548_cast_fp16 = matmul(transpose_x = var_1548_transpose_x_0, transpose_y = var_1548_transpose_y_0, x = transpose_110, y = transpose_111)[name = string("op_1548_cast_fp16")];
fp16 var_1549_to_fp16 = const()[name = string("op_1549_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_13_cast_fp16 = mul(x = var_1548_cast_fp16, y = var_1549_to_fp16)[name = string("xscores_13_cast_fp16")];
fp16 var_1567_to_fp16 = const()[name = string("op_1567_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_15_cast_fp16 = select(a = var_1567_to_fp16, b = xscores_13_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_15_cast_fp16")];
int32 var_1569 = const()[name = string("op_1569"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_7_cast_fp16 = softmax(axis = var_1569, x = xscores_15_cast_fp16)[name = string("xprobs_7_cast_fp16")];
bool var_1572_transpose_x_0 = const()[name = string("op_1572_transpose_x_0"), val = bool(false)];
bool var_1572_transpose_y_0 = const()[name = string("op_1572_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_3_to_fp16 = cast(dtype = xa_v_3_to_fp16_dtype_0, x = xa_v_3)[name = string("cast_19")];
tensor<fp16, [1, 1, 256, 128]> xvT_7_cast_fp16 = transpose(perm = var_1546, x = xa_v_3_to_fp16)[name = string("transpose_227")];
tensor<fp16, [1, 1, 1, 128]> var_1572_cast_fp16 = matmul(transpose_x = var_1572_transpose_x_0, transpose_y = var_1572_transpose_y_0, x = xprobs_7_cast_fp16, y = xvT_7_cast_fp16)[name = string("op_1572_cast_fp16")];
tensor<int32, [4]> var_1577 = const()[name = string("op_1577"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1582 = const()[name = string("op_1582"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1578_cast_fp16 = transpose(perm = var_1577, x = var_1572_cast_fp16)[name = string("transpose_226")];
tensor<fp16, [1, 1, 128]> input_55_cast_fp16 = reshape(shape = var_1582, x = var_1578_cast_fp16)[name = string("input_55_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25939648))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26038016))))[name = string("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_15_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_57_cast_fp16 = add(x = input_51_cast_fp16, y = linear_15_cast_fp16)[name = string("input_57_cast_fp16")];
tensor<int32, [1]> x_25_axes_0 = const()[name = string("x_25_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26039616)))];
fp16 var_1590_to_fp16 = const()[name = string("op_1590_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_1590_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_57_cast_fp16)[name = string("x_25_cast_fp16")];
tensor<int32, [3]> var_1606 = const()[name = string("op_1606"), val = tensor<int32, [3]>([0, 2, 1])];
string y_13_pad_type_0 = const()[name = string("y_13_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_13_strides_0 = const()[name = string("y_13_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_13_pad_0 = const()[name = string("y_13_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_13_dilations_0 = const()[name = string("y_13_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_13_groups_0 = const()[name = string("y_13_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26041216))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28400576))))[name = string("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_27_cast_fp16 = transpose(perm = var_1606, x = x_25_cast_fp16)[name = string("transpose_225")];
tensor<fp16, [1, 3072, 1]> y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = string("y_13_cast_fp16")];
string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_13_cast_fp16)[name = string("x_29_cast_fp16")];
string y_15_pad_type_0 = const()[name = string("y_15_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_15_strides_0 = const()[name = string("y_15_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_15_pad_0 = const()[name = string("y_15_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_15_dilations_0 = const()[name = string("y_15_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_15_groups_0 = const()[name = string("y_15_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28406784))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30766144))))[name = string("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = string("y_15_cast_fp16")];
tensor<int32, [3]> var_1624 = const()[name = string("op_1624"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_1625_cast_fp16 = transpose(perm = var_1624, x = y_15_cast_fp16)[name = string("transpose_224")];
tensor<fp16, [1, 1, 768]> input_59_cast_fp16 = add(x = input_57_cast_fp16, y = var_1625_cast_fp16)[name = string("input_59_cast_fp16")];
tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_4_norm_self_weight_to_fp16 = const()[name = string("dec_layers_4_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30767744)))];
fp16 var_1629_to_fp16 = const()[name = string("op_1629_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, epsilon = var_1629_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30769344))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32538880))))[name = string("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = string("linear_16_cast_fp16")];
tensor<int32, [5]> var_1643 = const()[name = string("op_1643"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_19_cast_fp16 = reshape(shape = var_1643, x = linear_16_cast_fp16)[name = string("qkv_19_cast_fp16")];
tensor<int32, [5]> q_9_begin_0 = const()[name = string("q_9_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_9_end_0 = const()[name = string("q_9_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_9_end_mask_0 = const()[name = string("q_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_9_squeeze_mask_0 = const()[name = string("q_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = string("q_9_cast_fp16")];
tensor<int32, [5]> new_k_9_begin_0 = const()[name = string("new_k_9_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_9_end_0 = const()[name = string("new_k_9_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_9_end_mask_0 = const()[name = string("new_k_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_9_squeeze_mask_0 = const()[name = string("new_k_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = string("new_k_9_cast_fp16")];
tensor<int32, [5]> new_v_9_begin_0 = const()[name = string("new_v_9_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_9_end_0 = const()[name = string("new_v_9_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_9_end_mask_0 = const()[name = string("new_v_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_9_squeeze_mask_0 = const()[name = string("new_v_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = string("new_v_9_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_8 = read_state(input = sa_k_4)[name = string("read_state_8")];
tensor<fp16, [1, 600, 12, 64]> var_1712_cast_fp16 = sub(x = new_k_9_cast_fp16, y = read_state_8)[name = string("op_1712_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1713_cast_fp16 = mul(x = var_1712_cast_fp16, y = write_oh_b_1)[name = string("op_1713_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_19_cast_fp16 = add(x = read_state_8, y = var_1713_cast_fp16)[name = string("sa_k_buf_19_cast_fp16")];
write_state(data = sa_k_buf_19_cast_fp16, input = sa_k_4)[name = string("coreml_update_state_32_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_32 = read_state(input = sa_k_4)[name = string("coreml_update_state_32")];
tensor<fp16, [1, 600, 12, 64]> read_state_9 = read_state(input = sa_v_4)[name = string("read_state_9")];
tensor<fp16, [1, 600, 12, 64]> var_1717_cast_fp16 = sub(x = new_v_9_cast_fp16, y = read_state_9)[name = string("op_1717_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1718_cast_fp16 = mul(x = var_1717_cast_fp16, y = write_oh_b_1)[name = string("op_1718_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_19_cast_fp16 = add(x = read_state_9, y = var_1718_cast_fp16)[name = string("sa_v_buf_19_cast_fp16")];
write_state(data = sa_v_buf_19_cast_fp16, input = sa_v_4)[name = string("coreml_update_state_33_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_33 = read_state(input = sa_v_4)[name = string("coreml_update_state_33")];
tensor<int32, [4]> var_1747 = const()[name = string("op_1747"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_1749_transpose_x_0 = const()[name = string("op_1749_transpose_x_0"), val = bool(false)];
bool var_1749_transpose_y_0 = const()[name = string("op_1749_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_113 = transpose(perm = transpose_113_perm_0, x = coreml_update_state_32)[name = string("transpose_222")];
tensor<fp16, [1, 12, 1, 64]> transpose_112 = transpose(perm = transpose_112_perm_0, x = q_9_cast_fp16)[name = string("transpose_223")];
tensor<fp16, [1, 12, 1, 600]> var_1749_cast_fp16 = matmul(transpose_x = var_1749_transpose_x_0, transpose_y = var_1749_transpose_y_0, x = transpose_112, y = transpose_113)[name = string("op_1749_cast_fp16")];
fp16 var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_17_cast_fp16 = mul(x = var_1749_cast_fp16, y = var_1750_to_fp16)[name = string("scores_17_cast_fp16")];
fp16 var_1768_to_fp16 = const()[name = string("op_1768_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_19_cast_fp16 = select(a = var_1768_to_fp16, b = scores_17_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_19_cast_fp16")];
int32 var_1770 = const()[name = string("op_1770"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_9_cast_fp16 = softmax(axis = var_1770, x = scores_19_cast_fp16)[name = string("probs_9_cast_fp16")];
bool var_1773_transpose_x_0 = const()[name = string("op_1773_transpose_x_0"), val = bool(false)];
bool var_1773_transpose_y_0 = const()[name = string("op_1773_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_9_cast_fp16 = transpose(perm = var_1747, x = coreml_update_state_33)[name = string("transpose_221")];
tensor<fp16, [1, 12, 1, 64]> var_1773_cast_fp16 = matmul(transpose_x = var_1773_transpose_x_0, transpose_y = var_1773_transpose_y_0, x = probs_9_cast_fp16, y = v_t_9_cast_fp16)[name = string("op_1773_cast_fp16")];
tensor<int32, [4]> var_1778 = const()[name = string("op_1778"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1783 = const()[name = string("op_1783"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_1779_cast_fp16 = transpose(perm = var_1778, x = var_1773_cast_fp16)[name = string("transpose_220")];
tensor<fp16, [1, 1, 768]> input_63_cast_fp16 = reshape(shape = var_1783, x = var_1779_cast_fp16)[name = string("input_63_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32543552))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33133440))))[name = string("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_17_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_65_cast_fp16 = add(x = input_59_cast_fp16, y = linear_17_cast_fp16)[name = string("input_65_cast_fp16")];
tensor<int32, [1]> input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33135040)))];
fp16 var_1791_to_fp16 = const()[name = string("op_1791_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, epsilon = var_1791_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_65_cast_fp16)[name = string("input_67_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33136640))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33235008))))[name = string("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_18_cast_fp16")];
tensor<int32, [4]> var_1804 = const()[name = string("op_1804"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_9_cast_fp16 = reshape(shape = var_1804, x = linear_18_cast_fp16)[name = string("xq_proj_9_cast_fp16")];
tensor<int32, [4]> var_1822 = const()[name = string("op_1822"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_4_to_fp16_dtype_0 = const()[name = string("xa_v_4_to_fp16_dtype_0"), val = string("fp16")];
bool var_1824_transpose_x_0 = const()[name = string("op_1824_transpose_x_0"), val = bool(false)];
bool var_1824_transpose_y_0 = const()[name = string("op_1824_transpose_y_0"), val = bool(false)];
string xa_k_4_to_fp16_dtype_0 = const()[name = string("xa_k_4_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_4_to_fp16 = cast(dtype = xa_k_4_to_fp16_dtype_0, x = xa_k_4)[name = string("cast_18")];
tensor<fp16, [1, 1, 128, 256]> transpose_115 = transpose(perm = transpose_115_perm_0, x = xa_k_4_to_fp16)[name = string("transpose_218")];
tensor<fp16, [1, 1, 1, 128]> transpose_114 = transpose(perm = transpose_114_perm_0, x = xq_proj_9_cast_fp16)[name = string("transpose_219")];
tensor<fp16, [1, 1, 1, 256]> var_1824_cast_fp16 = matmul(transpose_x = var_1824_transpose_x_0, transpose_y = var_1824_transpose_y_0, x = transpose_114, y = transpose_115)[name = string("op_1824_cast_fp16")];
fp16 var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_17_cast_fp16 = mul(x = var_1824_cast_fp16, y = var_1825_to_fp16)[name = string("xscores_17_cast_fp16")];
fp16 var_1843_to_fp16 = const()[name = string("op_1843_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_19_cast_fp16 = select(a = var_1843_to_fp16, b = xscores_17_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_19_cast_fp16")];
int32 var_1845 = const()[name = string("op_1845"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_9_cast_fp16 = softmax(axis = var_1845, x = xscores_19_cast_fp16)[name = string("xprobs_9_cast_fp16")];
bool var_1848_transpose_x_0 = const()[name = string("op_1848_transpose_x_0"), val = bool(false)];
bool var_1848_transpose_y_0 = const()[name = string("op_1848_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_4_to_fp16 = cast(dtype = xa_v_4_to_fp16_dtype_0, x = xa_v_4)[name = string("cast_17")];
tensor<fp16, [1, 1, 256, 128]> xvT_9_cast_fp16 = transpose(perm = var_1822, x = xa_v_4_to_fp16)[name = string("transpose_217")];
tensor<fp16, [1, 1, 1, 128]> var_1848_cast_fp16 = matmul(transpose_x = var_1848_transpose_x_0, transpose_y = var_1848_transpose_y_0, x = xprobs_9_cast_fp16, y = xvT_9_cast_fp16)[name = string("op_1848_cast_fp16")];
tensor<int32, [4]> var_1853 = const()[name = string("op_1853"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1858 = const()[name = string("op_1858"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_1854_cast_fp16 = transpose(perm = var_1853, x = var_1848_cast_fp16)[name = string("transpose_216")];
tensor<fp16, [1, 1, 128]> input_69_cast_fp16 = reshape(shape = var_1858, x = var_1854_cast_fp16)[name = string("input_69_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33235328))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33333696))))[name = string("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = string("linear_19_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_71_cast_fp16 = add(x = input_65_cast_fp16, y = linear_19_cast_fp16)[name = string("input_71_cast_fp16")];
tensor<int32, [1]> x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33335296)))];
fp16 var_1866_to_fp16 = const()[name = string("op_1866_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_1866_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_71_cast_fp16)[name = string("x_33_cast_fp16")];
tensor<int32, [3]> var_1882 = const()[name = string("op_1882"), val = tensor<int32, [3]>([0, 2, 1])];
string y_17_pad_type_0 = const()[name = string("y_17_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_17_strides_0 = const()[name = string("y_17_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_17_pad_0 = const()[name = string("y_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_17_dilations_0 = const()[name = string("y_17_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_17_groups_0 = const()[name = string("y_17_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33336896))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35696256))))[name = string("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_35_cast_fp16 = transpose(perm = var_1882, x = x_33_cast_fp16)[name = string("transpose_215")];
tensor<fp16, [1, 3072, 1]> y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_35_cast_fp16)[name = string("y_17_cast_fp16")];
string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = y_17_cast_fp16)[name = string("x_37_cast_fp16")];
string y_19_pad_type_0 = const()[name = string("y_19_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_19_strides_0 = const()[name = string("y_19_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_19_pad_0 = const()[name = string("y_19_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_19_dilations_0 = const()[name = string("y_19_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_19_groups_0 = const()[name = string("y_19_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35702464))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38061824))))[name = string("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_19_cast_fp16 = conv(dilations = y_19_dilations_0, groups = y_19_groups_0, pad = y_19_pad_0, pad_type = y_19_pad_type_0, strides = y_19_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = string("y_19_cast_fp16")];
tensor<int32, [3]> var_1900 = const()[name = string("op_1900"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_1901_cast_fp16 = transpose(perm = var_1900, x = y_19_cast_fp16)[name = string("transpose_214")];
tensor<fp16, [1, 1, 768]> input_73_cast_fp16 = add(x = input_71_cast_fp16, y = var_1901_cast_fp16)[name = string("input_73_cast_fp16")];
tensor<int32, [1]> input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_5_norm_self_weight_to_fp16 = const()[name = string("dec_layers_5_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38063424)))];
fp16 var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_75_cast_fp16 = layer_norm(axes = input_75_axes_0, epsilon = var_1905_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = input_73_cast_fp16)[name = string("input_75_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38065024))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39834560))))[name = string("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_20_cast_fp16")];
tensor<int32, [5]> var_1919 = const()[name = string("op_1919"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_23_cast_fp16 = reshape(shape = var_1919, x = linear_20_cast_fp16)[name = string("qkv_23_cast_fp16")];
tensor<int32, [5]> q_11_begin_0 = const()[name = string("q_11_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_11_end_0 = const()[name = string("q_11_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_11_end_mask_0 = const()[name = string("q_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_11_squeeze_mask_0 = const()[name = string("q_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_11_cast_fp16 = slice_by_index(begin = q_11_begin_0, end = q_11_end_0, end_mask = q_11_end_mask_0, squeeze_mask = q_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = string("q_11_cast_fp16")];
tensor<int32, [5]> new_k_11_begin_0 = const()[name = string("new_k_11_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_11_end_0 = const()[name = string("new_k_11_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_11_end_mask_0 = const()[name = string("new_k_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_11_squeeze_mask_0 = const()[name = string("new_k_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = string("new_k_11_cast_fp16")];
tensor<int32, [5]> new_v_11_begin_0 = const()[name = string("new_v_11_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_11_end_0 = const()[name = string("new_v_11_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_11_end_mask_0 = const()[name = string("new_v_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_11_squeeze_mask_0 = const()[name = string("new_v_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = string("new_v_11_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_10 = read_state(input = sa_k_5)[name = string("read_state_10")];
tensor<fp16, [1, 600, 12, 64]> var_1988_cast_fp16 = sub(x = new_k_11_cast_fp16, y = read_state_10)[name = string("op_1988_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1989_cast_fp16 = mul(x = var_1988_cast_fp16, y = write_oh_b_1)[name = string("op_1989_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_23_cast_fp16 = add(x = read_state_10, y = var_1989_cast_fp16)[name = string("sa_k_buf_23_cast_fp16")];
write_state(data = sa_k_buf_23_cast_fp16, input = sa_k_5)[name = string("coreml_update_state_34_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_34 = read_state(input = sa_k_5)[name = string("coreml_update_state_34")];
tensor<fp16, [1, 600, 12, 64]> read_state_11 = read_state(input = sa_v_5)[name = string("read_state_11")];
tensor<fp16, [1, 600, 12, 64]> var_1993_cast_fp16 = sub(x = new_v_11_cast_fp16, y = read_state_11)[name = string("op_1993_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_1994_cast_fp16 = mul(x = var_1993_cast_fp16, y = write_oh_b_1)[name = string("op_1994_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_23_cast_fp16 = add(x = read_state_11, y = var_1994_cast_fp16)[name = string("sa_v_buf_23_cast_fp16")];
write_state(data = sa_v_buf_23_cast_fp16, input = sa_v_5)[name = string("coreml_update_state_35_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_35 = read_state(input = sa_v_5)[name = string("coreml_update_state_35")];
tensor<int32, [4]> var_2023 = const()[name = string("op_2023"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_2025_transpose_x_0 = const()[name = string("op_2025_transpose_x_0"), val = bool(false)];
bool var_2025_transpose_y_0 = const()[name = string("op_2025_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_117 = transpose(perm = transpose_117_perm_0, x = coreml_update_state_34)[name = string("transpose_212")];
tensor<fp16, [1, 12, 1, 64]> transpose_116 = transpose(perm = transpose_116_perm_0, x = q_11_cast_fp16)[name = string("transpose_213")];
tensor<fp16, [1, 12, 1, 600]> var_2025_cast_fp16 = matmul(transpose_x = var_2025_transpose_x_0, transpose_y = var_2025_transpose_y_0, x = transpose_116, y = transpose_117)[name = string("op_2025_cast_fp16")];
fp16 var_2026_to_fp16 = const()[name = string("op_2026_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_21_cast_fp16 = mul(x = var_2025_cast_fp16, y = var_2026_to_fp16)[name = string("scores_21_cast_fp16")];
fp16 var_2044_to_fp16 = const()[name = string("op_2044_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_23_cast_fp16 = select(a = var_2044_to_fp16, b = scores_21_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_23_cast_fp16")];
int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_11_cast_fp16 = softmax(axis = var_2046, x = scores_23_cast_fp16)[name = string("probs_11_cast_fp16")];
bool var_2049_transpose_x_0 = const()[name = string("op_2049_transpose_x_0"), val = bool(false)];
bool var_2049_transpose_y_0 = const()[name = string("op_2049_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_11_cast_fp16 = transpose(perm = var_2023, x = coreml_update_state_35)[name = string("transpose_211")];
tensor<fp16, [1, 12, 1, 64]> var_2049_cast_fp16 = matmul(transpose_x = var_2049_transpose_x_0, transpose_y = var_2049_transpose_y_0, x = probs_11_cast_fp16, y = v_t_11_cast_fp16)[name = string("op_2049_cast_fp16")];
tensor<int32, [4]> var_2054 = const()[name = string("op_2054"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2059 = const()[name = string("op_2059"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_2055_cast_fp16 = transpose(perm = var_2054, x = var_2049_cast_fp16)[name = string("transpose_210")];
tensor<fp16, [1, 1, 768]> input_77_cast_fp16 = reshape(shape = var_2059, x = var_2055_cast_fp16)[name = string("input_77_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39839232))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40429120))))[name = string("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_77_cast_fp16)[name = string("linear_21_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_79_cast_fp16 = add(x = input_73_cast_fp16, y = linear_21_cast_fp16)[name = string("input_79_cast_fp16")];
tensor<int32, [1]> input_81_axes_0 = const()[name = string("input_81_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40430720)))];
fp16 var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_2067_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_79_cast_fp16)[name = string("input_81_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40432320))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40530688))))[name = string("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_22_cast_fp16")];
tensor<int32, [4]> var_2080 = const()[name = string("op_2080"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_11_cast_fp16 = reshape(shape = var_2080, x = linear_22_cast_fp16)[name = string("xq_proj_11_cast_fp16")];
tensor<int32, [4]> var_2098 = const()[name = string("op_2098"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_5_to_fp16_dtype_0 = const()[name = string("xa_v_5_to_fp16_dtype_0"), val = string("fp16")];
bool var_2100_transpose_x_0 = const()[name = string("op_2100_transpose_x_0"), val = bool(false)];
bool var_2100_transpose_y_0 = const()[name = string("op_2100_transpose_y_0"), val = bool(false)];
string xa_k_5_to_fp16_dtype_0 = const()[name = string("xa_k_5_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_5_to_fp16 = cast(dtype = xa_k_5_to_fp16_dtype_0, x = xa_k_5)[name = string("cast_16")];
tensor<fp16, [1, 1, 128, 256]> transpose_119 = transpose(perm = transpose_119_perm_0, x = xa_k_5_to_fp16)[name = string("transpose_208")];
tensor<fp16, [1, 1, 1, 128]> transpose_118 = transpose(perm = transpose_118_perm_0, x = xq_proj_11_cast_fp16)[name = string("transpose_209")];
tensor<fp16, [1, 1, 1, 256]> var_2100_cast_fp16 = matmul(transpose_x = var_2100_transpose_x_0, transpose_y = var_2100_transpose_y_0, x = transpose_118, y = transpose_119)[name = string("op_2100_cast_fp16")];
fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_21_cast_fp16 = mul(x = var_2100_cast_fp16, y = var_2101_to_fp16)[name = string("xscores_21_cast_fp16")];
fp16 var_2119_to_fp16 = const()[name = string("op_2119_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_23_cast_fp16 = select(a = var_2119_to_fp16, b = xscores_21_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_23_cast_fp16")];
int32 var_2121 = const()[name = string("op_2121"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_11_cast_fp16 = softmax(axis = var_2121, x = xscores_23_cast_fp16)[name = string("xprobs_11_cast_fp16")];
bool var_2124_transpose_x_0 = const()[name = string("op_2124_transpose_x_0"), val = bool(false)];
bool var_2124_transpose_y_0 = const()[name = string("op_2124_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_5_to_fp16 = cast(dtype = xa_v_5_to_fp16_dtype_0, x = xa_v_5)[name = string("cast_15")];
tensor<fp16, [1, 1, 256, 128]> xvT_11_cast_fp16 = transpose(perm = var_2098, x = xa_v_5_to_fp16)[name = string("transpose_207")];
tensor<fp16, [1, 1, 1, 128]> var_2124_cast_fp16 = matmul(transpose_x = var_2124_transpose_x_0, transpose_y = var_2124_transpose_y_0, x = xprobs_11_cast_fp16, y = xvT_11_cast_fp16)[name = string("op_2124_cast_fp16")];
tensor<int32, [4]> var_2129 = const()[name = string("op_2129"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2134 = const()[name = string("op_2134"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_2130_cast_fp16 = transpose(perm = var_2129, x = var_2124_cast_fp16)[name = string("transpose_206")];
tensor<fp16, [1, 1, 128]> input_83_cast_fp16 = reshape(shape = var_2134, x = var_2130_cast_fp16)[name = string("input_83_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40531008))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40629376))))[name = string("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_23_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_85_cast_fp16 = add(x = input_79_cast_fp16, y = linear_23_cast_fp16)[name = string("input_85_cast_fp16")];
tensor<int32, [1]> x_41_axes_0 = const()[name = string("x_41_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40630976)))];
fp16 var_2142_to_fp16 = const()[name = string("op_2142_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_2142_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_85_cast_fp16)[name = string("x_41_cast_fp16")];
tensor<int32, [3]> var_2158 = const()[name = string("op_2158"), val = tensor<int32, [3]>([0, 2, 1])];
string y_21_pad_type_0 = const()[name = string("y_21_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_21_strides_0 = const()[name = string("y_21_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_21_pad_0 = const()[name = string("y_21_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_21_dilations_0 = const()[name = string("y_21_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_21_groups_0 = const()[name = string("y_21_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40632576))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42991936))))[name = string("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_43_cast_fp16 = transpose(perm = var_2158, x = x_41_cast_fp16)[name = string("transpose_205")];
tensor<fp16, [1, 3072, 1]> y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = string("y_21_cast_fp16")];
string x_45_mode_0 = const()[name = string("x_45_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_21_cast_fp16)[name = string("x_45_cast_fp16")];
string y_23_pad_type_0 = const()[name = string("y_23_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_23_strides_0 = const()[name = string("y_23_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_23_pad_0 = const()[name = string("y_23_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_23_dilations_0 = const()[name = string("y_23_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_23_groups_0 = const()[name = string("y_23_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42998144))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45357504))))[name = string("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = string("y_23_cast_fp16")];
tensor<int32, [3]> var_2176 = const()[name = string("op_2176"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_2177_cast_fp16 = transpose(perm = var_2176, x = y_23_cast_fp16)[name = string("transpose_204")];
tensor<fp16, [1, 1, 768]> input_87_cast_fp16 = add(x = input_85_cast_fp16, y = var_2177_cast_fp16)[name = string("input_87_cast_fp16")];
tensor<int32, [1]> input_89_axes_0 = const()[name = string("input_89_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_6_norm_self_weight_to_fp16 = const()[name = string("dec_layers_6_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45359104)))];
fp16 var_2181_to_fp16 = const()[name = string("op_2181_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_89_cast_fp16 = layer_norm(axes = input_89_axes_0, epsilon = var_2181_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45360704))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47130240))))[name = string("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_24_cast_fp16")];
tensor<int32, [5]> var_2195 = const()[name = string("op_2195"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_27_cast_fp16 = reshape(shape = var_2195, x = linear_24_cast_fp16)[name = string("qkv_27_cast_fp16")];
tensor<int32, [5]> q_13_begin_0 = const()[name = string("q_13_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_13_end_0 = const()[name = string("q_13_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_13_end_mask_0 = const()[name = string("q_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_13_squeeze_mask_0 = const()[name = string("q_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = string("q_13_cast_fp16")];
tensor<int32, [5]> new_k_13_begin_0 = const()[name = string("new_k_13_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_13_end_0 = const()[name = string("new_k_13_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_13_end_mask_0 = const()[name = string("new_k_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_13_squeeze_mask_0 = const()[name = string("new_k_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = string("new_k_13_cast_fp16")];
tensor<int32, [5]> new_v_13_begin_0 = const()[name = string("new_v_13_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_13_end_0 = const()[name = string("new_v_13_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_13_end_mask_0 = const()[name = string("new_v_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_13_squeeze_mask_0 = const()[name = string("new_v_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = string("new_v_13_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_12 = read_state(input = sa_k_6)[name = string("read_state_12")];
tensor<fp16, [1, 600, 12, 64]> var_2264_cast_fp16 = sub(x = new_k_13_cast_fp16, y = read_state_12)[name = string("op_2264_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_2265_cast_fp16 = mul(x = var_2264_cast_fp16, y = write_oh_b_1)[name = string("op_2265_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_27_cast_fp16 = add(x = read_state_12, y = var_2265_cast_fp16)[name = string("sa_k_buf_27_cast_fp16")];
write_state(data = sa_k_buf_27_cast_fp16, input = sa_k_6)[name = string("coreml_update_state_36_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_36 = read_state(input = sa_k_6)[name = string("coreml_update_state_36")];
tensor<fp16, [1, 600, 12, 64]> read_state_13 = read_state(input = sa_v_6)[name = string("read_state_13")];
tensor<fp16, [1, 600, 12, 64]> var_2269_cast_fp16 = sub(x = new_v_13_cast_fp16, y = read_state_13)[name = string("op_2269_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_2270_cast_fp16 = mul(x = var_2269_cast_fp16, y = write_oh_b_1)[name = string("op_2270_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_27_cast_fp16 = add(x = read_state_13, y = var_2270_cast_fp16)[name = string("sa_v_buf_27_cast_fp16")];
write_state(data = sa_v_buf_27_cast_fp16, input = sa_v_6)[name = string("coreml_update_state_37_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_37 = read_state(input = sa_v_6)[name = string("coreml_update_state_37")];
tensor<int32, [4]> var_2299 = const()[name = string("op_2299"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_2301_transpose_x_0 = const()[name = string("op_2301_transpose_x_0"), val = bool(false)];
bool var_2301_transpose_y_0 = const()[name = string("op_2301_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_121 = transpose(perm = transpose_121_perm_0, x = coreml_update_state_36)[name = string("transpose_202")];
tensor<fp16, [1, 12, 1, 64]> transpose_120 = transpose(perm = transpose_120_perm_0, x = q_13_cast_fp16)[name = string("transpose_203")];
tensor<fp16, [1, 12, 1, 600]> var_2301_cast_fp16 = matmul(transpose_x = var_2301_transpose_x_0, transpose_y = var_2301_transpose_y_0, x = transpose_120, y = transpose_121)[name = string("op_2301_cast_fp16")];
fp16 var_2302_to_fp16 = const()[name = string("op_2302_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_25_cast_fp16 = mul(x = var_2301_cast_fp16, y = var_2302_to_fp16)[name = string("scores_25_cast_fp16")];
fp16 var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_27_cast_fp16 = select(a = var_2320_to_fp16, b = scores_25_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_27_cast_fp16")];
int32 var_2322 = const()[name = string("op_2322"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_13_cast_fp16 = softmax(axis = var_2322, x = scores_27_cast_fp16)[name = string("probs_13_cast_fp16")];
bool var_2325_transpose_x_0 = const()[name = string("op_2325_transpose_x_0"), val = bool(false)];
bool var_2325_transpose_y_0 = const()[name = string("op_2325_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_13_cast_fp16 = transpose(perm = var_2299, x = coreml_update_state_37)[name = string("transpose_201")];
tensor<fp16, [1, 12, 1, 64]> var_2325_cast_fp16 = matmul(transpose_x = var_2325_transpose_x_0, transpose_y = var_2325_transpose_y_0, x = probs_13_cast_fp16, y = v_t_13_cast_fp16)[name = string("op_2325_cast_fp16")];
tensor<int32, [4]> var_2330 = const()[name = string("op_2330"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2335 = const()[name = string("op_2335"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_2331_cast_fp16 = transpose(perm = var_2330, x = var_2325_cast_fp16)[name = string("transpose_200")];
tensor<fp16, [1, 1, 768]> input_91_cast_fp16 = reshape(shape = var_2335, x = var_2331_cast_fp16)[name = string("input_91_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47134912))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47724800))))[name = string("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_25_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_93_cast_fp16 = add(x = input_87_cast_fp16, y = linear_25_cast_fp16)[name = string("input_93_cast_fp16")];
tensor<int32, [1]> input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47726400)))];
fp16 var_2343_to_fp16 = const()[name = string("op_2343_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, epsilon = var_2343_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47728000))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47826368))))[name = string("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_26_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_26_cast_fp16")];
tensor<int32, [4]> var_2356 = const()[name = string("op_2356"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_13_cast_fp16 = reshape(shape = var_2356, x = linear_26_cast_fp16)[name = string("xq_proj_13_cast_fp16")];
tensor<int32, [4]> var_2374 = const()[name = string("op_2374"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_6_to_fp16_dtype_0 = const()[name = string("xa_v_6_to_fp16_dtype_0"), val = string("fp16")];
bool var_2376_transpose_x_0 = const()[name = string("op_2376_transpose_x_0"), val = bool(false)];
bool var_2376_transpose_y_0 = const()[name = string("op_2376_transpose_y_0"), val = bool(false)];
string xa_k_6_to_fp16_dtype_0 = const()[name = string("xa_k_6_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_6_to_fp16 = cast(dtype = xa_k_6_to_fp16_dtype_0, x = xa_k_6)[name = string("cast_14")];
tensor<fp16, [1, 1, 128, 256]> transpose_123 = transpose(perm = transpose_123_perm_0, x = xa_k_6_to_fp16)[name = string("transpose_198")];
tensor<fp16, [1, 1, 1, 128]> transpose_122 = transpose(perm = transpose_122_perm_0, x = xq_proj_13_cast_fp16)[name = string("transpose_199")];
tensor<fp16, [1, 1, 1, 256]> var_2376_cast_fp16 = matmul(transpose_x = var_2376_transpose_x_0, transpose_y = var_2376_transpose_y_0, x = transpose_122, y = transpose_123)[name = string("op_2376_cast_fp16")];
fp16 var_2377_to_fp16 = const()[name = string("op_2377_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_25_cast_fp16 = mul(x = var_2376_cast_fp16, y = var_2377_to_fp16)[name = string("xscores_25_cast_fp16")];
fp16 var_2395_to_fp16 = const()[name = string("op_2395_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_27_cast_fp16 = select(a = var_2395_to_fp16, b = xscores_25_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_27_cast_fp16")];
int32 var_2397 = const()[name = string("op_2397"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_13_cast_fp16 = softmax(axis = var_2397, x = xscores_27_cast_fp16)[name = string("xprobs_13_cast_fp16")];
bool var_2400_transpose_x_0 = const()[name = string("op_2400_transpose_x_0"), val = bool(false)];
bool var_2400_transpose_y_0 = const()[name = string("op_2400_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_6_to_fp16 = cast(dtype = xa_v_6_to_fp16_dtype_0, x = xa_v_6)[name = string("cast_13")];
tensor<fp16, [1, 1, 256, 128]> xvT_13_cast_fp16 = transpose(perm = var_2374, x = xa_v_6_to_fp16)[name = string("transpose_197")];
tensor<fp16, [1, 1, 1, 128]> var_2400_cast_fp16 = matmul(transpose_x = var_2400_transpose_x_0, transpose_y = var_2400_transpose_y_0, x = xprobs_13_cast_fp16, y = xvT_13_cast_fp16)[name = string("op_2400_cast_fp16")];
tensor<int32, [4]> var_2405 = const()[name = string("op_2405"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2410 = const()[name = string("op_2410"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_2406_cast_fp16 = transpose(perm = var_2405, x = var_2400_cast_fp16)[name = string("transpose_196")];
tensor<fp16, [1, 1, 128]> input_97_cast_fp16 = reshape(shape = var_2410, x = var_2406_cast_fp16)[name = string("input_97_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47826688))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47925056))))[name = string("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_27_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_99_cast_fp16 = add(x = input_93_cast_fp16, y = linear_27_cast_fp16)[name = string("input_99_cast_fp16")];
tensor<int32, [1]> x_49_axes_0 = const()[name = string("x_49_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47926656)))];
fp16 var_2418_to_fp16 = const()[name = string("op_2418_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_49_cast_fp16 = layer_norm(axes = x_49_axes_0, epsilon = var_2418_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_99_cast_fp16)[name = string("x_49_cast_fp16")];
tensor<int32, [3]> var_2434 = const()[name = string("op_2434"), val = tensor<int32, [3]>([0, 2, 1])];
string y_25_pad_type_0 = const()[name = string("y_25_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_25_strides_0 = const()[name = string("y_25_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_25_pad_0 = const()[name = string("y_25_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_25_dilations_0 = const()[name = string("y_25_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_25_groups_0 = const()[name = string("y_25_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47928256))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50287616))))[name = string("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_51_cast_fp16 = transpose(perm = var_2434, x = x_49_cast_fp16)[name = string("transpose_195")];
tensor<fp16, [1, 3072, 1]> y_25_cast_fp16 = conv(dilations = y_25_dilations_0, groups = y_25_groups_0, pad = y_25_pad_0, pad_type = y_25_pad_type_0, strides = y_25_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_51_cast_fp16)[name = string("y_25_cast_fp16")];
string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = y_25_cast_fp16)[name = string("x_53_cast_fp16")];
string y_27_pad_type_0 = const()[name = string("y_27_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_27_strides_0 = const()[name = string("y_27_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_27_pad_0 = const()[name = string("y_27_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_27_dilations_0 = const()[name = string("y_27_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_27_groups_0 = const()[name = string("y_27_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50293824))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52653184))))[name = string("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = string("y_27_cast_fp16")];
tensor<int32, [3]> var_2452 = const()[name = string("op_2452"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_2453_cast_fp16 = transpose(perm = var_2452, x = y_27_cast_fp16)[name = string("transpose_194")];
tensor<fp16, [1, 1, 768]> input_101_cast_fp16 = add(x = input_99_cast_fp16, y = var_2453_cast_fp16)[name = string("input_101_cast_fp16")];
tensor<int32, [1]> input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_7_norm_self_weight_to_fp16 = const()[name = string("dec_layers_7_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52654784)))];
fp16 var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, epsilon = var_2457_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = input_101_cast_fp16)[name = string("input_103_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52656384))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54425920))))[name = string("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_28_cast_fp16")];
tensor<int32, [5]> var_2471 = const()[name = string("op_2471"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_31_cast_fp16 = reshape(shape = var_2471, x = linear_28_cast_fp16)[name = string("qkv_31_cast_fp16")];
tensor<int32, [5]> q_15_begin_0 = const()[name = string("q_15_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_15_end_0 = const()[name = string("q_15_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_15_end_mask_0 = const()[name = string("q_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_15_squeeze_mask_0 = const()[name = string("q_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_15_cast_fp16 = slice_by_index(begin = q_15_begin_0, end = q_15_end_0, end_mask = q_15_end_mask_0, squeeze_mask = q_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = string("q_15_cast_fp16")];
tensor<int32, [5]> new_k_15_begin_0 = const()[name = string("new_k_15_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_15_end_0 = const()[name = string("new_k_15_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_15_end_mask_0 = const()[name = string("new_k_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_15_squeeze_mask_0 = const()[name = string("new_k_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = string("new_k_15_cast_fp16")];
tensor<int32, [5]> new_v_15_begin_0 = const()[name = string("new_v_15_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_15_end_0 = const()[name = string("new_v_15_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_15_end_mask_0 = const()[name = string("new_v_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_15_squeeze_mask_0 = const()[name = string("new_v_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = string("new_v_15_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_14 = read_state(input = sa_k_7)[name = string("read_state_14")];
tensor<fp16, [1, 600, 12, 64]> var_2540_cast_fp16 = sub(x = new_k_15_cast_fp16, y = read_state_14)[name = string("op_2540_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_2541_cast_fp16 = mul(x = var_2540_cast_fp16, y = write_oh_b_1)[name = string("op_2541_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_31_cast_fp16 = add(x = read_state_14, y = var_2541_cast_fp16)[name = string("sa_k_buf_31_cast_fp16")];
write_state(data = sa_k_buf_31_cast_fp16, input = sa_k_7)[name = string("coreml_update_state_38_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_38 = read_state(input = sa_k_7)[name = string("coreml_update_state_38")];
tensor<fp16, [1, 600, 12, 64]> read_state_15 = read_state(input = sa_v_7)[name = string("read_state_15")];
tensor<fp16, [1, 600, 12, 64]> var_2545_cast_fp16 = sub(x = new_v_15_cast_fp16, y = read_state_15)[name = string("op_2545_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_2546_cast_fp16 = mul(x = var_2545_cast_fp16, y = write_oh_b_1)[name = string("op_2546_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_31_cast_fp16 = add(x = read_state_15, y = var_2546_cast_fp16)[name = string("sa_v_buf_31_cast_fp16")];
write_state(data = sa_v_buf_31_cast_fp16, input = sa_v_7)[name = string("coreml_update_state_39_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_39 = read_state(input = sa_v_7)[name = string("coreml_update_state_39")];
tensor<int32, [4]> var_2575 = const()[name = string("op_2575"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_2577_transpose_x_0 = const()[name = string("op_2577_transpose_x_0"), val = bool(false)];
bool var_2577_transpose_y_0 = const()[name = string("op_2577_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_125 = transpose(perm = transpose_125_perm_0, x = coreml_update_state_38)[name = string("transpose_192")];
tensor<fp16, [1, 12, 1, 64]> transpose_124 = transpose(perm = transpose_124_perm_0, x = q_15_cast_fp16)[name = string("transpose_193")];
tensor<fp16, [1, 12, 1, 600]> var_2577_cast_fp16 = matmul(transpose_x = var_2577_transpose_x_0, transpose_y = var_2577_transpose_y_0, x = transpose_124, y = transpose_125)[name = string("op_2577_cast_fp16")];
fp16 var_2578_to_fp16 = const()[name = string("op_2578_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_29_cast_fp16 = mul(x = var_2577_cast_fp16, y = var_2578_to_fp16)[name = string("scores_29_cast_fp16")];
fp16 var_2596_to_fp16 = const()[name = string("op_2596_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_31_cast_fp16 = select(a = var_2596_to_fp16, b = scores_29_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_31_cast_fp16")];
int32 var_2598 = const()[name = string("op_2598"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_15_cast_fp16 = softmax(axis = var_2598, x = scores_31_cast_fp16)[name = string("probs_15_cast_fp16")];
bool var_2601_transpose_x_0 = const()[name = string("op_2601_transpose_x_0"), val = bool(false)];
bool var_2601_transpose_y_0 = const()[name = string("op_2601_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_15_cast_fp16 = transpose(perm = var_2575, x = coreml_update_state_39)[name = string("transpose_191")];
tensor<fp16, [1, 12, 1, 64]> var_2601_cast_fp16 = matmul(transpose_x = var_2601_transpose_x_0, transpose_y = var_2601_transpose_y_0, x = probs_15_cast_fp16, y = v_t_15_cast_fp16)[name = string("op_2601_cast_fp16")];
tensor<int32, [4]> var_2606 = const()[name = string("op_2606"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2611 = const()[name = string("op_2611"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_2607_cast_fp16 = transpose(perm = var_2606, x = var_2601_cast_fp16)[name = string("transpose_190")];
tensor<fp16, [1, 1, 768]> input_105_cast_fp16 = reshape(shape = var_2611, x = var_2607_cast_fp16)[name = string("input_105_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54430592))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55020480))))[name = string("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_29_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_107_cast_fp16 = add(x = input_101_cast_fp16, y = linear_29_cast_fp16)[name = string("input_107_cast_fp16")];
tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55022080)))];
fp16 var_2619_to_fp16 = const()[name = string("op_2619_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, epsilon = var_2619_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55023680))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55122048))))[name = string("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = input_109_cast_fp16)[name = string("linear_30_cast_fp16")];
tensor<int32, [4]> var_2632 = const()[name = string("op_2632"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_15_cast_fp16 = reshape(shape = var_2632, x = linear_30_cast_fp16)[name = string("xq_proj_15_cast_fp16")];
tensor<int32, [4]> var_2650 = const()[name = string("op_2650"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_7_to_fp16_dtype_0 = const()[name = string("xa_v_7_to_fp16_dtype_0"), val = string("fp16")];
bool var_2652_transpose_x_0 = const()[name = string("op_2652_transpose_x_0"), val = bool(false)];
bool var_2652_transpose_y_0 = const()[name = string("op_2652_transpose_y_0"), val = bool(false)];
string xa_k_7_to_fp16_dtype_0 = const()[name = string("xa_k_7_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_7_to_fp16 = cast(dtype = xa_k_7_to_fp16_dtype_0, x = xa_k_7)[name = string("cast_12")];
tensor<fp16, [1, 1, 128, 256]> transpose_127 = transpose(perm = transpose_127_perm_0, x = xa_k_7_to_fp16)[name = string("transpose_188")];
tensor<fp16, [1, 1, 1, 128]> transpose_126 = transpose(perm = transpose_126_perm_0, x = xq_proj_15_cast_fp16)[name = string("transpose_189")];
tensor<fp16, [1, 1, 1, 256]> var_2652_cast_fp16 = matmul(transpose_x = var_2652_transpose_x_0, transpose_y = var_2652_transpose_y_0, x = transpose_126, y = transpose_127)[name = string("op_2652_cast_fp16")];
fp16 var_2653_to_fp16 = const()[name = string("op_2653_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_29_cast_fp16 = mul(x = var_2652_cast_fp16, y = var_2653_to_fp16)[name = string("xscores_29_cast_fp16")];
fp16 var_2671_to_fp16 = const()[name = string("op_2671_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_31_cast_fp16 = select(a = var_2671_to_fp16, b = xscores_29_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_31_cast_fp16")];
int32 var_2673 = const()[name = string("op_2673"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_15_cast_fp16 = softmax(axis = var_2673, x = xscores_31_cast_fp16)[name = string("xprobs_15_cast_fp16")];
bool var_2676_transpose_x_0 = const()[name = string("op_2676_transpose_x_0"), val = bool(false)];
bool var_2676_transpose_y_0 = const()[name = string("op_2676_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_7_to_fp16 = cast(dtype = xa_v_7_to_fp16_dtype_0, x = xa_v_7)[name = string("cast_11")];
tensor<fp16, [1, 1, 256, 128]> xvT_15_cast_fp16 = transpose(perm = var_2650, x = xa_v_7_to_fp16)[name = string("transpose_187")];
tensor<fp16, [1, 1, 1, 128]> var_2676_cast_fp16 = matmul(transpose_x = var_2676_transpose_x_0, transpose_y = var_2676_transpose_y_0, x = xprobs_15_cast_fp16, y = xvT_15_cast_fp16)[name = string("op_2676_cast_fp16")];
tensor<int32, [4]> var_2681 = const()[name = string("op_2681"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2686 = const()[name = string("op_2686"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_2682_cast_fp16 = transpose(perm = var_2681, x = var_2676_cast_fp16)[name = string("transpose_186")];
tensor<fp16, [1, 1, 128]> input_111_cast_fp16 = reshape(shape = var_2686, x = var_2682_cast_fp16)[name = string("input_111_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55122368))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55220736))))[name = string("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_31_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_113_cast_fp16 = add(x = input_107_cast_fp16, y = linear_31_cast_fp16)[name = string("input_113_cast_fp16")];
tensor<int32, [1]> x_57_axes_0 = const()[name = string("x_57_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55222336)))];
fp16 var_2694_to_fp16 = const()[name = string("op_2694_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_57_cast_fp16 = layer_norm(axes = x_57_axes_0, epsilon = var_2694_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = string("x_57_cast_fp16")];
tensor<int32, [3]> var_2710 = const()[name = string("op_2710"), val = tensor<int32, [3]>([0, 2, 1])];
string y_29_pad_type_0 = const()[name = string("y_29_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_29_strides_0 = const()[name = string("y_29_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_29_pad_0 = const()[name = string("y_29_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_29_dilations_0 = const()[name = string("y_29_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_29_groups_0 = const()[name = string("y_29_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55223936))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57583296))))[name = string("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_59_cast_fp16 = transpose(perm = var_2710, x = x_57_cast_fp16)[name = string("transpose_185")];
tensor<fp16, [1, 3072, 1]> y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = string("y_29_cast_fp16")];
string x_61_mode_0 = const()[name = string("x_61_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_29_cast_fp16)[name = string("x_61_cast_fp16")];
string y_31_pad_type_0 = const()[name = string("y_31_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_31_strides_0 = const()[name = string("y_31_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_31_pad_0 = const()[name = string("y_31_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_31_dilations_0 = const()[name = string("y_31_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_31_groups_0 = const()[name = string("y_31_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57589504))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59948864))))[name = string("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_31_cast_fp16 = conv(dilations = y_31_dilations_0, groups = y_31_groups_0, pad = y_31_pad_0, pad_type = y_31_pad_type_0, strides = y_31_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = string("y_31_cast_fp16")];
tensor<int32, [3]> var_2728 = const()[name = string("op_2728"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_2729_cast_fp16 = transpose(perm = var_2728, x = y_31_cast_fp16)[name = string("transpose_184")];
tensor<fp16, [1, 1, 768]> input_115_cast_fp16 = add(x = input_113_cast_fp16, y = var_2729_cast_fp16)[name = string("input_115_cast_fp16")];
tensor<int32, [1]> input_117_axes_0 = const()[name = string("input_117_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_8_norm_self_weight_to_fp16 = const()[name = string("dec_layers_8_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59950464)))];
fp16 var_2733_to_fp16 = const()[name = string("op_2733_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_2733_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = input_115_cast_fp16)[name = string("input_117_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59952064))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61721600))))[name = string("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = string("linear_32_cast_fp16")];
tensor<int32, [5]> var_2747 = const()[name = string("op_2747"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_35_cast_fp16 = reshape(shape = var_2747, x = linear_32_cast_fp16)[name = string("qkv_35_cast_fp16")];
tensor<int32, [5]> q_17_begin_0 = const()[name = string("q_17_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_17_end_0 = const()[name = string("q_17_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_17_end_mask_0 = const()[name = string("q_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_17_squeeze_mask_0 = const()[name = string("q_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = string("q_17_cast_fp16")];
tensor<int32, [5]> new_k_17_begin_0 = const()[name = string("new_k_17_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_17_end_0 = const()[name = string("new_k_17_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_17_end_mask_0 = const()[name = string("new_k_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_17_squeeze_mask_0 = const()[name = string("new_k_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = string("new_k_17_cast_fp16")];
tensor<int32, [5]> new_v_17_begin_0 = const()[name = string("new_v_17_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_17_end_0 = const()[name = string("new_v_17_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_17_end_mask_0 = const()[name = string("new_v_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_17_squeeze_mask_0 = const()[name = string("new_v_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = string("new_v_17_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_16 = read_state(input = sa_k_8)[name = string("read_state_16")];
tensor<fp16, [1, 600, 12, 64]> var_2816_cast_fp16 = sub(x = new_k_17_cast_fp16, y = read_state_16)[name = string("op_2816_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_2817_cast_fp16 = mul(x = var_2816_cast_fp16, y = write_oh_b_1)[name = string("op_2817_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_35_cast_fp16 = add(x = read_state_16, y = var_2817_cast_fp16)[name = string("sa_k_buf_35_cast_fp16")];
write_state(data = sa_k_buf_35_cast_fp16, input = sa_k_8)[name = string("coreml_update_state_40_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_40 = read_state(input = sa_k_8)[name = string("coreml_update_state_40")];
tensor<fp16, [1, 600, 12, 64]> read_state_17 = read_state(input = sa_v_8)[name = string("read_state_17")];
tensor<fp16, [1, 600, 12, 64]> var_2821_cast_fp16 = sub(x = new_v_17_cast_fp16, y = read_state_17)[name = string("op_2821_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_2822_cast_fp16 = mul(x = var_2821_cast_fp16, y = write_oh_b_1)[name = string("op_2822_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_35_cast_fp16 = add(x = read_state_17, y = var_2822_cast_fp16)[name = string("sa_v_buf_35_cast_fp16")];
write_state(data = sa_v_buf_35_cast_fp16, input = sa_v_8)[name = string("coreml_update_state_41_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_41 = read_state(input = sa_v_8)[name = string("coreml_update_state_41")];
tensor<int32, [4]> var_2851 = const()[name = string("op_2851"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_2853_transpose_x_0 = const()[name = string("op_2853_transpose_x_0"), val = bool(false)];
bool var_2853_transpose_y_0 = const()[name = string("op_2853_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_129 = transpose(perm = transpose_129_perm_0, x = coreml_update_state_40)[name = string("transpose_182")];
tensor<fp16, [1, 12, 1, 64]> transpose_128 = transpose(perm = transpose_128_perm_0, x = q_17_cast_fp16)[name = string("transpose_183")];
tensor<fp16, [1, 12, 1, 600]> var_2853_cast_fp16 = matmul(transpose_x = var_2853_transpose_x_0, transpose_y = var_2853_transpose_y_0, x = transpose_128, y = transpose_129)[name = string("op_2853_cast_fp16")];
fp16 var_2854_to_fp16 = const()[name = string("op_2854_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_33_cast_fp16 = mul(x = var_2853_cast_fp16, y = var_2854_to_fp16)[name = string("scores_33_cast_fp16")];
fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_35_cast_fp16 = select(a = var_2872_to_fp16, b = scores_33_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_35_cast_fp16")];
int32 var_2874 = const()[name = string("op_2874"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_17_cast_fp16 = softmax(axis = var_2874, x = scores_35_cast_fp16)[name = string("probs_17_cast_fp16")];
bool var_2877_transpose_x_0 = const()[name = string("op_2877_transpose_x_0"), val = bool(false)];
bool var_2877_transpose_y_0 = const()[name = string("op_2877_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_17_cast_fp16 = transpose(perm = var_2851, x = coreml_update_state_41)[name = string("transpose_181")];
tensor<fp16, [1, 12, 1, 64]> var_2877_cast_fp16 = matmul(transpose_x = var_2877_transpose_x_0, transpose_y = var_2877_transpose_y_0, x = probs_17_cast_fp16, y = v_t_17_cast_fp16)[name = string("op_2877_cast_fp16")];
tensor<int32, [4]> var_2882 = const()[name = string("op_2882"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2887 = const()[name = string("op_2887"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_2883_cast_fp16 = transpose(perm = var_2882, x = var_2877_cast_fp16)[name = string("transpose_180")];
tensor<fp16, [1, 1, 768]> input_119_cast_fp16 = reshape(shape = var_2887, x = var_2883_cast_fp16)[name = string("input_119_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61726272))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62316160))))[name = string("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_33_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_121_cast_fp16 = add(x = input_115_cast_fp16, y = linear_33_cast_fp16)[name = string("input_121_cast_fp16")];
tensor<int32, [1]> input_123_axes_0 = const()[name = string("input_123_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62317760)))];
fp16 var_2895_to_fp16 = const()[name = string("op_2895_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, epsilon = var_2895_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = string("input_123_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62319360))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62417728))))[name = string("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_34_cast_fp16")];
tensor<int32, [4]> var_2908 = const()[name = string("op_2908"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_17_cast_fp16 = reshape(shape = var_2908, x = linear_34_cast_fp16)[name = string("xq_proj_17_cast_fp16")];
tensor<int32, [4]> var_2926 = const()[name = string("op_2926"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_8_to_fp16_dtype_0 = const()[name = string("xa_v_8_to_fp16_dtype_0"), val = string("fp16")];
bool var_2928_transpose_x_0 = const()[name = string("op_2928_transpose_x_0"), val = bool(false)];
bool var_2928_transpose_y_0 = const()[name = string("op_2928_transpose_y_0"), val = bool(false)];
string xa_k_8_to_fp16_dtype_0 = const()[name = string("xa_k_8_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_8_to_fp16 = cast(dtype = xa_k_8_to_fp16_dtype_0, x = xa_k_8)[name = string("cast_10")];
tensor<fp16, [1, 1, 128, 256]> transpose_131 = transpose(perm = transpose_131_perm_0, x = xa_k_8_to_fp16)[name = string("transpose_178")];
tensor<fp16, [1, 1, 1, 128]> transpose_130 = transpose(perm = transpose_130_perm_0, x = xq_proj_17_cast_fp16)[name = string("transpose_179")];
tensor<fp16, [1, 1, 1, 256]> var_2928_cast_fp16 = matmul(transpose_x = var_2928_transpose_x_0, transpose_y = var_2928_transpose_y_0, x = transpose_130, y = transpose_131)[name = string("op_2928_cast_fp16")];
fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_33_cast_fp16 = mul(x = var_2928_cast_fp16, y = var_2929_to_fp16)[name = string("xscores_33_cast_fp16")];
fp16 var_2947_to_fp16 = const()[name = string("op_2947_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_35_cast_fp16 = select(a = var_2947_to_fp16, b = xscores_33_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_35_cast_fp16")];
int32 var_2949 = const()[name = string("op_2949"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_17_cast_fp16 = softmax(axis = var_2949, x = xscores_35_cast_fp16)[name = string("xprobs_17_cast_fp16")];
bool var_2952_transpose_x_0 = const()[name = string("op_2952_transpose_x_0"), val = bool(false)];
bool var_2952_transpose_y_0 = const()[name = string("op_2952_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_8_to_fp16 = cast(dtype = xa_v_8_to_fp16_dtype_0, x = xa_v_8)[name = string("cast_9")];
tensor<fp16, [1, 1, 256, 128]> xvT_17_cast_fp16 = transpose(perm = var_2926, x = xa_v_8_to_fp16)[name = string("transpose_177")];
tensor<fp16, [1, 1, 1, 128]> var_2952_cast_fp16 = matmul(transpose_x = var_2952_transpose_x_0, transpose_y = var_2952_transpose_y_0, x = xprobs_17_cast_fp16, y = xvT_17_cast_fp16)[name = string("op_2952_cast_fp16")];
tensor<int32, [4]> var_2957 = const()[name = string("op_2957"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2962 = const()[name = string("op_2962"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_2958_cast_fp16 = transpose(perm = var_2957, x = var_2952_cast_fp16)[name = string("transpose_176")];
tensor<fp16, [1, 1, 128]> input_125_cast_fp16 = reshape(shape = var_2962, x = var_2958_cast_fp16)[name = string("input_125_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62418048))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62516416))))[name = string("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = string("linear_35_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_127_cast_fp16 = add(x = input_121_cast_fp16, y = linear_35_cast_fp16)[name = string("input_127_cast_fp16")];
tensor<int32, [1]> x_65_axes_0 = const()[name = string("x_65_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62518016)))];
fp16 var_2970_to_fp16 = const()[name = string("op_2970_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_2970_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_127_cast_fp16)[name = string("x_65_cast_fp16")];
tensor<int32, [3]> var_2986 = const()[name = string("op_2986"), val = tensor<int32, [3]>([0, 2, 1])];
string y_33_pad_type_0 = const()[name = string("y_33_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_33_strides_0 = const()[name = string("y_33_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_33_pad_0 = const()[name = string("y_33_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_33_dilations_0 = const()[name = string("y_33_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_33_groups_0 = const()[name = string("y_33_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62519616))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64878976))))[name = string("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_67_cast_fp16 = transpose(perm = var_2986, x = x_65_cast_fp16)[name = string("transpose_175")];
tensor<fp16, [1, 3072, 1]> y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_67_cast_fp16)[name = string("y_33_cast_fp16")];
string x_69_mode_0 = const()[name = string("x_69_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_69_cast_fp16 = gelu(mode = x_69_mode_0, x = y_33_cast_fp16)[name = string("x_69_cast_fp16")];
string y_35_pad_type_0 = const()[name = string("y_35_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_35_strides_0 = const()[name = string("y_35_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_35_pad_0 = const()[name = string("y_35_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_35_dilations_0 = const()[name = string("y_35_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_35_groups_0 = const()[name = string("y_35_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64885184))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67244544))))[name = string("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = string("y_35_cast_fp16")];
tensor<int32, [3]> var_3004 = const()[name = string("op_3004"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_3005_cast_fp16 = transpose(perm = var_3004, x = y_35_cast_fp16)[name = string("transpose_174")];
tensor<fp16, [1, 1, 768]> input_129_cast_fp16 = add(x = input_127_cast_fp16, y = var_3005_cast_fp16)[name = string("input_129_cast_fp16")];
tensor<int32, [1]> input_131_axes_0 = const()[name = string("input_131_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_9_norm_self_weight_to_fp16 = const()[name = string("dec_layers_9_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67246144)))];
fp16 var_3009_to_fp16 = const()[name = string("op_3009_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, epsilon = var_3009_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = input_129_cast_fp16)[name = string("input_131_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67247744))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69017280))))[name = string("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_36_cast_fp16")];
tensor<int32, [5]> var_3023 = const()[name = string("op_3023"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_39_cast_fp16 = reshape(shape = var_3023, x = linear_36_cast_fp16)[name = string("qkv_39_cast_fp16")];
tensor<int32, [5]> q_19_begin_0 = const()[name = string("q_19_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_19_end_0 = const()[name = string("q_19_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_19_end_mask_0 = const()[name = string("q_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_19_squeeze_mask_0 = const()[name = string("q_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_19_cast_fp16 = slice_by_index(begin = q_19_begin_0, end = q_19_end_0, end_mask = q_19_end_mask_0, squeeze_mask = q_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = string("q_19_cast_fp16")];
tensor<int32, [5]> new_k_19_begin_0 = const()[name = string("new_k_19_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_19_end_0 = const()[name = string("new_k_19_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_19_end_mask_0 = const()[name = string("new_k_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_19_squeeze_mask_0 = const()[name = string("new_k_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = string("new_k_19_cast_fp16")];
tensor<int32, [5]> new_v_19_begin_0 = const()[name = string("new_v_19_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_19_end_0 = const()[name = string("new_v_19_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_19_end_mask_0 = const()[name = string("new_v_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_19_squeeze_mask_0 = const()[name = string("new_v_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = string("new_v_19_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_18 = read_state(input = sa_k_9)[name = string("read_state_18")];
tensor<fp16, [1, 600, 12, 64]> var_3092_cast_fp16 = sub(x = new_k_19_cast_fp16, y = read_state_18)[name = string("op_3092_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_3093_cast_fp16 = mul(x = var_3092_cast_fp16, y = write_oh_b_1)[name = string("op_3093_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_39_cast_fp16 = add(x = read_state_18, y = var_3093_cast_fp16)[name = string("sa_k_buf_39_cast_fp16")];
write_state(data = sa_k_buf_39_cast_fp16, input = sa_k_9)[name = string("coreml_update_state_42_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_42 = read_state(input = sa_k_9)[name = string("coreml_update_state_42")];
tensor<fp16, [1, 600, 12, 64]> read_state_19 = read_state(input = sa_v_9)[name = string("read_state_19")];
tensor<fp16, [1, 600, 12, 64]> var_3097_cast_fp16 = sub(x = new_v_19_cast_fp16, y = read_state_19)[name = string("op_3097_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_3098_cast_fp16 = mul(x = var_3097_cast_fp16, y = write_oh_b_1)[name = string("op_3098_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_39_cast_fp16 = add(x = read_state_19, y = var_3098_cast_fp16)[name = string("sa_v_buf_39_cast_fp16")];
write_state(data = sa_v_buf_39_cast_fp16, input = sa_v_9)[name = string("coreml_update_state_43_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_43 = read_state(input = sa_v_9)[name = string("coreml_update_state_43")];
tensor<int32, [4]> var_3127 = const()[name = string("op_3127"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_3129_transpose_x_0 = const()[name = string("op_3129_transpose_x_0"), val = bool(false)];
bool var_3129_transpose_y_0 = const()[name = string("op_3129_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_133 = transpose(perm = transpose_133_perm_0, x = coreml_update_state_42)[name = string("transpose_172")];
tensor<fp16, [1, 12, 1, 64]> transpose_132 = transpose(perm = transpose_132_perm_0, x = q_19_cast_fp16)[name = string("transpose_173")];
tensor<fp16, [1, 12, 1, 600]> var_3129_cast_fp16 = matmul(transpose_x = var_3129_transpose_x_0, transpose_y = var_3129_transpose_y_0, x = transpose_132, y = transpose_133)[name = string("op_3129_cast_fp16")];
fp16 var_3130_to_fp16 = const()[name = string("op_3130_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_37_cast_fp16 = mul(x = var_3129_cast_fp16, y = var_3130_to_fp16)[name = string("scores_37_cast_fp16")];
fp16 var_3148_to_fp16 = const()[name = string("op_3148_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_39_cast_fp16 = select(a = var_3148_to_fp16, b = scores_37_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_39_cast_fp16")];
int32 var_3150 = const()[name = string("op_3150"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_19_cast_fp16 = softmax(axis = var_3150, x = scores_39_cast_fp16)[name = string("probs_19_cast_fp16")];
bool var_3153_transpose_x_0 = const()[name = string("op_3153_transpose_x_0"), val = bool(false)];
bool var_3153_transpose_y_0 = const()[name = string("op_3153_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_19_cast_fp16 = transpose(perm = var_3127, x = coreml_update_state_43)[name = string("transpose_171")];
tensor<fp16, [1, 12, 1, 64]> var_3153_cast_fp16 = matmul(transpose_x = var_3153_transpose_x_0, transpose_y = var_3153_transpose_y_0, x = probs_19_cast_fp16, y = v_t_19_cast_fp16)[name = string("op_3153_cast_fp16")];
tensor<int32, [4]> var_3158 = const()[name = string("op_3158"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_3163 = const()[name = string("op_3163"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_3159_cast_fp16 = transpose(perm = var_3158, x = var_3153_cast_fp16)[name = string("transpose_170")];
tensor<fp16, [1, 1, 768]> input_133_cast_fp16 = reshape(shape = var_3163, x = var_3159_cast_fp16)[name = string("input_133_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69021952))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69611840))))[name = string("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = string("linear_37_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_37_cast_fp16)[name = string("input_135_cast_fp16")];
tensor<int32, [1]> input_137_axes_0 = const()[name = string("input_137_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69613440)))];
fp16 var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, epsilon = var_3171_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_135_cast_fp16)[name = string("input_137_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69615040))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69713408))))[name = string("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = string("linear_38_cast_fp16")];
tensor<int32, [4]> var_3184 = const()[name = string("op_3184"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_19_cast_fp16 = reshape(shape = var_3184, x = linear_38_cast_fp16)[name = string("xq_proj_19_cast_fp16")];
tensor<int32, [4]> var_3202 = const()[name = string("op_3202"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_9_to_fp16_dtype_0 = const()[name = string("xa_v_9_to_fp16_dtype_0"), val = string("fp16")];
bool var_3204_transpose_x_0 = const()[name = string("op_3204_transpose_x_0"), val = bool(false)];
bool var_3204_transpose_y_0 = const()[name = string("op_3204_transpose_y_0"), val = bool(false)];
string xa_k_9_to_fp16_dtype_0 = const()[name = string("xa_k_9_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_9_to_fp16 = cast(dtype = xa_k_9_to_fp16_dtype_0, x = xa_k_9)[name = string("cast_8")];
tensor<fp16, [1, 1, 128, 256]> transpose_135 = transpose(perm = transpose_135_perm_0, x = xa_k_9_to_fp16)[name = string("transpose_168")];
tensor<fp16, [1, 1, 1, 128]> transpose_134 = transpose(perm = transpose_134_perm_0, x = xq_proj_19_cast_fp16)[name = string("transpose_169")];
tensor<fp16, [1, 1, 1, 256]> var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_0, transpose_y = var_3204_transpose_y_0, x = transpose_134, y = transpose_135)[name = string("op_3204_cast_fp16")];
fp16 var_3205_to_fp16 = const()[name = string("op_3205_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_37_cast_fp16 = mul(x = var_3204_cast_fp16, y = var_3205_to_fp16)[name = string("xscores_37_cast_fp16")];
fp16 var_3223_to_fp16 = const()[name = string("op_3223_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_39_cast_fp16 = select(a = var_3223_to_fp16, b = xscores_37_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_39_cast_fp16")];
int32 var_3225 = const()[name = string("op_3225"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_19_cast_fp16 = softmax(axis = var_3225, x = xscores_39_cast_fp16)[name = string("xprobs_19_cast_fp16")];
bool var_3228_transpose_x_0 = const()[name = string("op_3228_transpose_x_0"), val = bool(false)];
bool var_3228_transpose_y_0 = const()[name = string("op_3228_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_9_to_fp16 = cast(dtype = xa_v_9_to_fp16_dtype_0, x = xa_v_9)[name = string("cast_7")];
tensor<fp16, [1, 1, 256, 128]> xvT_19_cast_fp16 = transpose(perm = var_3202, x = xa_v_9_to_fp16)[name = string("transpose_167")];
tensor<fp16, [1, 1, 1, 128]> var_3228_cast_fp16 = matmul(transpose_x = var_3228_transpose_x_0, transpose_y = var_3228_transpose_y_0, x = xprobs_19_cast_fp16, y = xvT_19_cast_fp16)[name = string("op_3228_cast_fp16")];
tensor<int32, [4]> var_3233 = const()[name = string("op_3233"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_3238 = const()[name = string("op_3238"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_3234_cast_fp16 = transpose(perm = var_3233, x = var_3228_cast_fp16)[name = string("transpose_166")];
tensor<fp16, [1, 1, 128]> input_139_cast_fp16 = reshape(shape = var_3238, x = var_3234_cast_fp16)[name = string("input_139_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69713728))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69812096))))[name = string("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_39_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_141_cast_fp16 = add(x = input_135_cast_fp16, y = linear_39_cast_fp16)[name = string("input_141_cast_fp16")];
tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69813696)))];
fp16 var_3246_to_fp16 = const()[name = string("op_3246_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_3246_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_141_cast_fp16)[name = string("x_73_cast_fp16")];
tensor<int32, [3]> var_3262 = const()[name = string("op_3262"), val = tensor<int32, [3]>([0, 2, 1])];
string y_37_pad_type_0 = const()[name = string("y_37_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_37_strides_0 = const()[name = string("y_37_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_37_pad_0 = const()[name = string("y_37_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_37_dilations_0 = const()[name = string("y_37_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_37_groups_0 = const()[name = string("y_37_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69815296))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72174656))))[name = string("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_75_cast_fp16 = transpose(perm = var_3262, x = x_73_cast_fp16)[name = string("transpose_165")];
tensor<fp16, [1, 3072, 1]> y_37_cast_fp16 = conv(dilations = y_37_dilations_0, groups = y_37_groups_0, pad = y_37_pad_0, pad_type = y_37_pad_type_0, strides = y_37_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = string("y_37_cast_fp16")];
string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_37_cast_fp16)[name = string("x_77_cast_fp16")];
string y_39_pad_type_0 = const()[name = string("y_39_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_39_strides_0 = const()[name = string("y_39_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_39_pad_0 = const()[name = string("y_39_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_39_dilations_0 = const()[name = string("y_39_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_39_groups_0 = const()[name = string("y_39_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72180864))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74540224))))[name = string("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = string("y_39_cast_fp16")];
tensor<int32, [3]> var_3280 = const()[name = string("op_3280"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_3281_cast_fp16 = transpose(perm = var_3280, x = y_39_cast_fp16)[name = string("transpose_164")];
tensor<fp16, [1, 1, 768]> input_143_cast_fp16 = add(x = input_141_cast_fp16, y = var_3281_cast_fp16)[name = string("input_143_cast_fp16")];
tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_10_norm_self_weight_to_fp16 = const()[name = string("dec_layers_10_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74541824)))];
fp16 var_3285_to_fp16 = const()[name = string("op_3285_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, epsilon = var_3285_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74543424))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76312960))))[name = string("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = string("linear_40_cast_fp16")];
tensor<int32, [5]> var_3299 = const()[name = string("op_3299"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_43_cast_fp16 = reshape(shape = var_3299, x = linear_40_cast_fp16)[name = string("qkv_43_cast_fp16")];
tensor<int32, [5]> q_21_begin_0 = const()[name = string("q_21_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_21_end_0 = const()[name = string("q_21_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_21_end_mask_0 = const()[name = string("q_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_21_squeeze_mask_0 = const()[name = string("q_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = string("q_21_cast_fp16")];
tensor<int32, [5]> new_k_21_begin_0 = const()[name = string("new_k_21_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_21_end_0 = const()[name = string("new_k_21_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_21_end_mask_0 = const()[name = string("new_k_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_21_squeeze_mask_0 = const()[name = string("new_k_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = string("new_k_21_cast_fp16")];
tensor<int32, [5]> new_v_21_begin_0 = const()[name = string("new_v_21_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_21_end_0 = const()[name = string("new_v_21_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_21_end_mask_0 = const()[name = string("new_v_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_21_squeeze_mask_0 = const()[name = string("new_v_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = string("new_v_21_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_20 = read_state(input = sa_k_10)[name = string("read_state_20")];
tensor<fp16, [1, 600, 12, 64]> var_3368_cast_fp16 = sub(x = new_k_21_cast_fp16, y = read_state_20)[name = string("op_3368_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_3369_cast_fp16 = mul(x = var_3368_cast_fp16, y = write_oh_b_1)[name = string("op_3369_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_43_cast_fp16 = add(x = read_state_20, y = var_3369_cast_fp16)[name = string("sa_k_buf_43_cast_fp16")];
write_state(data = sa_k_buf_43_cast_fp16, input = sa_k_10)[name = string("coreml_update_state_44_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_44 = read_state(input = sa_k_10)[name = string("coreml_update_state_44")];
tensor<fp16, [1, 600, 12, 64]> read_state_21 = read_state(input = sa_v_10)[name = string("read_state_21")];
tensor<fp16, [1, 600, 12, 64]> var_3373_cast_fp16 = sub(x = new_v_21_cast_fp16, y = read_state_21)[name = string("op_3373_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_3374_cast_fp16 = mul(x = var_3373_cast_fp16, y = write_oh_b_1)[name = string("op_3374_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_43_cast_fp16 = add(x = read_state_21, y = var_3374_cast_fp16)[name = string("sa_v_buf_43_cast_fp16")];
write_state(data = sa_v_buf_43_cast_fp16, input = sa_v_10)[name = string("coreml_update_state_45_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_45 = read_state(input = sa_v_10)[name = string("coreml_update_state_45")];
tensor<int32, [4]> var_3403 = const()[name = string("op_3403"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_3405_transpose_x_0 = const()[name = string("op_3405_transpose_x_0"), val = bool(false)];
bool var_3405_transpose_y_0 = const()[name = string("op_3405_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_137 = transpose(perm = transpose_137_perm_0, x = coreml_update_state_44)[name = string("transpose_162")];
tensor<fp16, [1, 12, 1, 64]> transpose_136 = transpose(perm = transpose_136_perm_0, x = q_21_cast_fp16)[name = string("transpose_163")];
tensor<fp16, [1, 12, 1, 600]> var_3405_cast_fp16 = matmul(transpose_x = var_3405_transpose_x_0, transpose_y = var_3405_transpose_y_0, x = transpose_136, y = transpose_137)[name = string("op_3405_cast_fp16")];
fp16 var_3406_to_fp16 = const()[name = string("op_3406_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_41_cast_fp16 = mul(x = var_3405_cast_fp16, y = var_3406_to_fp16)[name = string("scores_41_cast_fp16")];
fp16 var_3424_to_fp16 = const()[name = string("op_3424_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_43_cast_fp16 = select(a = var_3424_to_fp16, b = scores_41_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_43_cast_fp16")];
int32 var_3426 = const()[name = string("op_3426"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_21_cast_fp16 = softmax(axis = var_3426, x = scores_43_cast_fp16)[name = string("probs_21_cast_fp16")];
bool var_3429_transpose_x_0 = const()[name = string("op_3429_transpose_x_0"), val = bool(false)];
bool var_3429_transpose_y_0 = const()[name = string("op_3429_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_21_cast_fp16 = transpose(perm = var_3403, x = coreml_update_state_45)[name = string("transpose_161")];
tensor<fp16, [1, 12, 1, 64]> var_3429_cast_fp16 = matmul(transpose_x = var_3429_transpose_x_0, transpose_y = var_3429_transpose_y_0, x = probs_21_cast_fp16, y = v_t_21_cast_fp16)[name = string("op_3429_cast_fp16")];
tensor<int32, [4]> var_3434 = const()[name = string("op_3434"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_3439 = const()[name = string("op_3439"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_3435_cast_fp16 = transpose(perm = var_3434, x = var_3429_cast_fp16)[name = string("transpose_160")];
tensor<fp16, [1, 1, 768]> input_147_cast_fp16 = reshape(shape = var_3439, x = var_3435_cast_fp16)[name = string("input_147_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76317632))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76907520))))[name = string("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_41_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_41_cast_fp16)[name = string("input_149_cast_fp16")];
tensor<int32, [1]> input_151_axes_0 = const()[name = string("input_151_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76909120)))];
fp16 var_3447_to_fp16 = const()[name = string("op_3447_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_151_cast_fp16 = layer_norm(axes = input_151_axes_0, epsilon = var_3447_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_149_cast_fp16)[name = string("input_151_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76910720))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77009088))))[name = string("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = string("linear_42_cast_fp16")];
tensor<int32, [4]> var_3460 = const()[name = string("op_3460"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_21_cast_fp16 = reshape(shape = var_3460, x = linear_42_cast_fp16)[name = string("xq_proj_21_cast_fp16")];
tensor<int32, [4]> var_3478 = const()[name = string("op_3478"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_10_to_fp16_dtype_0 = const()[name = string("xa_v_10_to_fp16_dtype_0"), val = string("fp16")];
bool var_3480_transpose_x_0 = const()[name = string("op_3480_transpose_x_0"), val = bool(false)];
bool var_3480_transpose_y_0 = const()[name = string("op_3480_transpose_y_0"), val = bool(false)];
string xa_k_10_to_fp16_dtype_0 = const()[name = string("xa_k_10_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_10_to_fp16 = cast(dtype = xa_k_10_to_fp16_dtype_0, x = xa_k_10)[name = string("cast_6")];
tensor<fp16, [1, 1, 128, 256]> transpose_139 = transpose(perm = transpose_139_perm_0, x = xa_k_10_to_fp16)[name = string("transpose_158")];
tensor<fp16, [1, 1, 1, 128]> transpose_138 = transpose(perm = transpose_138_perm_0, x = xq_proj_21_cast_fp16)[name = string("transpose_159")];
tensor<fp16, [1, 1, 1, 256]> var_3480_cast_fp16 = matmul(transpose_x = var_3480_transpose_x_0, transpose_y = var_3480_transpose_y_0, x = transpose_138, y = transpose_139)[name = string("op_3480_cast_fp16")];
fp16 var_3481_to_fp16 = const()[name = string("op_3481_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_41_cast_fp16 = mul(x = var_3480_cast_fp16, y = var_3481_to_fp16)[name = string("xscores_41_cast_fp16")];
fp16 var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_43_cast_fp16 = select(a = var_3499_to_fp16, b = xscores_41_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_43_cast_fp16")];
int32 var_3501 = const()[name = string("op_3501"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_21_cast_fp16 = softmax(axis = var_3501, x = xscores_43_cast_fp16)[name = string("xprobs_21_cast_fp16")];
bool var_3504_transpose_x_0 = const()[name = string("op_3504_transpose_x_0"), val = bool(false)];
bool var_3504_transpose_y_0 = const()[name = string("op_3504_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_10_to_fp16 = cast(dtype = xa_v_10_to_fp16_dtype_0, x = xa_v_10)[name = string("cast_5")];
tensor<fp16, [1, 1, 256, 128]> xvT_21_cast_fp16 = transpose(perm = var_3478, x = xa_v_10_to_fp16)[name = string("transpose_157")];
tensor<fp16, [1, 1, 1, 128]> var_3504_cast_fp16 = matmul(transpose_x = var_3504_transpose_x_0, transpose_y = var_3504_transpose_y_0, x = xprobs_21_cast_fp16, y = xvT_21_cast_fp16)[name = string("op_3504_cast_fp16")];
tensor<int32, [4]> var_3509 = const()[name = string("op_3509"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_3514 = const()[name = string("op_3514"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_3510_cast_fp16 = transpose(perm = var_3509, x = var_3504_cast_fp16)[name = string("transpose_156")];
tensor<fp16, [1, 1, 128]> input_153_cast_fp16 = reshape(shape = var_3514, x = var_3510_cast_fp16)[name = string("input_153_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77009408))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77107776))))[name = string("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = string("linear_43_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_43_cast_fp16)[name = string("input_155_cast_fp16")];
tensor<int32, [1]> x_81_axes_0 = const()[name = string("x_81_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77109376)))];
fp16 var_3522_to_fp16 = const()[name = string("op_3522_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_3522_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_155_cast_fp16)[name = string("x_81_cast_fp16")];
tensor<int32, [3]> var_3538 = const()[name = string("op_3538"), val = tensor<int32, [3]>([0, 2, 1])];
string y_41_pad_type_0 = const()[name = string("y_41_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_41_strides_0 = const()[name = string("y_41_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_41_pad_0 = const()[name = string("y_41_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_41_dilations_0 = const()[name = string("y_41_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_41_groups_0 = const()[name = string("y_41_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77110976))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79470336))))[name = string("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_83_cast_fp16 = transpose(perm = var_3538, x = x_81_cast_fp16)[name = string("transpose_155")];
tensor<fp16, [1, 3072, 1]> y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_83_cast_fp16)[name = string("y_41_cast_fp16")];
string x_85_mode_0 = const()[name = string("x_85_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_85_cast_fp16 = gelu(mode = x_85_mode_0, x = y_41_cast_fp16)[name = string("x_85_cast_fp16")];
string y_43_pad_type_0 = const()[name = string("y_43_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_43_strides_0 = const()[name = string("y_43_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_43_pad_0 = const()[name = string("y_43_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_43_dilations_0 = const()[name = string("y_43_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_43_groups_0 = const()[name = string("y_43_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79476544))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81835904))))[name = string("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_43_cast_fp16 = conv(dilations = y_43_dilations_0, groups = y_43_groups_0, pad = y_43_pad_0, pad_type = y_43_pad_type_0, strides = y_43_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = string("y_43_cast_fp16")];
tensor<int32, [3]> var_3556 = const()[name = string("op_3556"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_3557_cast_fp16 = transpose(perm = var_3556, x = y_43_cast_fp16)[name = string("transpose_154")];
tensor<fp16, [1, 1, 768]> input_157_cast_fp16 = add(x = input_155_cast_fp16, y = var_3557_cast_fp16)[name = string("input_157_cast_fp16")];
tensor<int32, [1]> input_159_axes_0 = const()[name = string("input_159_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_11_norm_self_weight_to_fp16 = const()[name = string("dec_layers_11_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81837504)))];
fp16 var_3561_to_fp16 = const()[name = string("op_3561_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, epsilon = var_3561_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = input_157_cast_fp16)[name = string("input_159_cast_fp16")];
tensor<fp16, [2304, 768]> dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [2304, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81839104))), scale = tensor<fp16, [2304, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83608640))))[name = string("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 2304]> linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = string("linear_44_cast_fp16")];
tensor<int32, [5]> var_3575 = const()[name = string("op_3575"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<fp16, [1, 1, 3, 12, 64]> qkv_cast_fp16 = reshape(shape = var_3575, x = linear_44_cast_fp16)[name = string("qkv_cast_fp16")];
tensor<int32, [5]> q_begin_0 = const()[name = string("q_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
tensor<int32, [5]> q_end_0 = const()[name = string("q_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
tensor<bool, [5]> q_end_mask_0 = const()[name = string("q_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> q_squeeze_mask_0 = const()[name = string("q_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> q_cast_fp16 = slice_by_index(begin = q_begin_0, end = q_end_0, end_mask = q_end_mask_0, squeeze_mask = q_squeeze_mask_0, x = qkv_cast_fp16)[name = string("q_cast_fp16")];
tensor<int32, [5]> new_k_begin_0 = const()[name = string("new_k_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
tensor<int32, [5]> new_k_end_0 = const()[name = string("new_k_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
tensor<bool, [5]> new_k_end_mask_0 = const()[name = string("new_k_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_k_squeeze_mask_0 = const()[name = string("new_k_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = string("new_k_cast_fp16")];
tensor<int32, [5]> new_v_begin_0 = const()[name = string("new_v_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
tensor<int32, [5]> new_v_end_0 = const()[name = string("new_v_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
tensor<bool, [5]> new_v_end_mask_0 = const()[name = string("new_v_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
tensor<bool, [5]> new_v_squeeze_mask_0 = const()[name = string("new_v_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
tensor<fp16, [1, 1, 12, 64]> new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = string("new_v_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> read_state_22 = read_state(input = sa_k_11)[name = string("read_state_22")];
tensor<fp16, [1, 600, 12, 64]> var_3644_cast_fp16 = sub(x = new_k_cast_fp16, y = read_state_22)[name = string("op_3644_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_3645_cast_fp16 = mul(x = var_3644_cast_fp16, y = write_oh_b_1)[name = string("op_3645_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_k_buf_cast_fp16 = add(x = read_state_22, y = var_3645_cast_fp16)[name = string("sa_k_buf_cast_fp16")];
write_state(data = sa_k_buf_cast_fp16, input = sa_k_11)[name = string("coreml_update_state_46_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_46 = read_state(input = sa_k_11)[name = string("coreml_update_state_46")];
tensor<fp16, [1, 600, 12, 64]> read_state_23 = read_state(input = sa_v_11)[name = string("read_state_23")];
tensor<fp16, [1, 600, 12, 64]> var_3649_cast_fp16 = sub(x = new_v_cast_fp16, y = read_state_23)[name = string("op_3649_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> var_3650_cast_fp16 = mul(x = var_3649_cast_fp16, y = write_oh_b_1)[name = string("op_3650_cast_fp16")];
tensor<fp16, [1, 600, 12, 64]> sa_v_buf_cast_fp16 = add(x = read_state_23, y = var_3650_cast_fp16)[name = string("sa_v_buf_cast_fp16")];
write_state(data = sa_v_buf_cast_fp16, input = sa_v_11)[name = string("coreml_update_state_47_write_state")];
tensor<fp16, [1, 600, 12, 64]> coreml_update_state_47 = read_state(input = sa_v_11)[name = string("coreml_update_state_47")];
tensor<int32, [4]> var_3679 = const()[name = string("op_3679"), val = tensor<int32, [4]>([0, 2, -3, -1])];
bool var_3681_transpose_x_0 = const()[name = string("op_3681_transpose_x_0"), val = bool(false)];
bool var_3681_transpose_y_0 = const()[name = string("op_3681_transpose_y_0"), val = bool(false)];
tensor<int32, [4]> transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 12, 64, 600]> transpose_141 = transpose(perm = transpose_141_perm_0, x = coreml_update_state_46)[name = string("transpose_152")];
tensor<fp16, [1, 12, 1, 64]> transpose_140 = transpose(perm = transpose_140_perm_0, x = q_cast_fp16)[name = string("transpose_153")];
tensor<fp16, [1, 12, 1, 600]> var_3681_cast_fp16 = matmul(transpose_x = var_3681_transpose_x_0, transpose_y = var_3681_transpose_y_0, x = transpose_140, y = transpose_141)[name = string("op_3681_cast_fp16")];
fp16 var_3682_to_fp16 = const()[name = string("op_3682_to_fp16"), val = fp16(0x1p-3)];
tensor<fp16, [1, 12, 1, 600]> scores_45_cast_fp16 = mul(x = var_3681_cast_fp16, y = var_3682_to_fp16)[name = string("scores_45_cast_fp16")];
fp16 var_3700_to_fp16 = const()[name = string("op_3700_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 12, 1, 600]> scores_cast_fp16 = select(a = var_3700_to_fp16, b = scores_45_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_cast_fp16")];
int32 var_3702 = const()[name = string("op_3702"), val = int32(-1)];
tensor<fp16, [1, 12, 1, 600]> probs_cast_fp16 = softmax(axis = var_3702, x = scores_cast_fp16)[name = string("probs_cast_fp16")];
bool var_3705_transpose_x_0 = const()[name = string("op_3705_transpose_x_0"), val = bool(false)];
bool var_3705_transpose_y_0 = const()[name = string("op_3705_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 12, 600, 64]> v_t_cast_fp16 = transpose(perm = var_3679, x = coreml_update_state_47)[name = string("transpose_151")];
tensor<fp16, [1, 12, 1, 64]> var_3705_cast_fp16 = matmul(transpose_x = var_3705_transpose_x_0, transpose_y = var_3705_transpose_y_0, x = probs_cast_fp16, y = v_t_cast_fp16)[name = string("op_3705_cast_fp16")];
tensor<int32, [4]> var_3710 = const()[name = string("op_3710"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_3715 = const()[name = string("op_3715"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 12, 64]> var_3711_cast_fp16 = transpose(perm = var_3710, x = var_3705_cast_fp16)[name = string("transpose_150")];
tensor<fp16, [1, 1, 768]> input_161_cast_fp16 = reshape(shape = var_3715, x = var_3711_cast_fp16)[name = string("input_161_cast_fp16")];
tensor<fp16, [768, 768]> dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83613312))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84203200))))[name = string("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = string("linear_45_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_163_cast_fp16 = add(x = input_157_cast_fp16, y = linear_45_cast_fp16)[name = string("input_163_cast_fp16")];
tensor<int32, [1]> input_165_axes_0 = const()[name = string("input_165_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84204800)))];
fp16 var_3723_to_fp16 = const()[name = string("op_3723_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_165_cast_fp16 = layer_norm(axes = input_165_axes_0, epsilon = var_3723_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_163_cast_fp16)[name = string("input_165_cast_fp16")];
tensor<fp16, [128, 768]> dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [128, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84206400))), scale = tensor<fp16, [128, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84304768))))[name = string("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 128]> linear_46_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = string("linear_46_cast_fp16")];
tensor<int32, [4]> var_3736 = const()[name = string("op_3736"), val = tensor<int32, [4]>([1, 1, 1, 128])];
tensor<fp16, [1, 1, 1, 128]> xq_proj_cast_fp16 = reshape(shape = var_3736, x = linear_46_cast_fp16)[name = string("xq_proj_cast_fp16")];
tensor<int32, [4]> var_3754 = const()[name = string("op_3754"), val = tensor<int32, [4]>([0, 2, -3, -1])];
string xa_v_11_to_fp16_dtype_0 = const()[name = string("xa_v_11_to_fp16_dtype_0"), val = string("fp16")];
bool var_3756_transpose_x_0 = const()[name = string("op_3756_transpose_x_0"), val = bool(false)];
bool var_3756_transpose_y_0 = const()[name = string("op_3756_transpose_y_0"), val = bool(false)];
string xa_k_11_to_fp16_dtype_0 = const()[name = string("xa_k_11_to_fp16_dtype_0"), val = string("fp16")];
tensor<int32, [4]> transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
tensor<int32, [4]> transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
tensor<fp16, [1, 256, 1, 128]> xa_k_11_to_fp16 = cast(dtype = xa_k_11_to_fp16_dtype_0, x = xa_k_11)[name = string("cast_4")];
tensor<fp16, [1, 1, 128, 256]> transpose_143 = transpose(perm = transpose_143_perm_0, x = xa_k_11_to_fp16)[name = string("transpose_148")];
tensor<fp16, [1, 1, 1, 128]> transpose_142 = transpose(perm = transpose_142_perm_0, x = xq_proj_cast_fp16)[name = string("transpose_149")];
tensor<fp16, [1, 1, 1, 256]> var_3756_cast_fp16 = matmul(transpose_x = var_3756_transpose_x_0, transpose_y = var_3756_transpose_y_0, x = transpose_142, y = transpose_143)[name = string("op_3756_cast_fp16")];
fp16 var_3757_to_fp16 = const()[name = string("op_3757_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 1, 1, 256]> xscores_45_cast_fp16 = mul(x = var_3756_cast_fp16, y = var_3757_to_fp16)[name = string("xscores_45_cast_fp16")];
fp16 var_3775_to_fp16 = const()[name = string("op_3775_to_fp16"), val = fp16(-inf)];
tensor<fp16, [1, 1, 1, 256]> xscores_cast_fp16 = select(a = var_3775_to_fp16, b = xscores_45_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_cast_fp16")];
int32 var_3777 = const()[name = string("op_3777"), val = int32(-1)];
tensor<fp16, [1, 1, 1, 256]> xprobs_cast_fp16 = softmax(axis = var_3777, x = xscores_cast_fp16)[name = string("xprobs_cast_fp16")];
bool var_3780_transpose_x_0 = const()[name = string("op_3780_transpose_x_0"), val = bool(false)];
bool var_3780_transpose_y_0 = const()[name = string("op_3780_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 256, 1, 128]> xa_v_11_to_fp16 = cast(dtype = xa_v_11_to_fp16_dtype_0, x = xa_v_11)[name = string("cast_3")];
tensor<fp16, [1, 1, 256, 128]> xvT_cast_fp16 = transpose(perm = var_3754, x = xa_v_11_to_fp16)[name = string("transpose_147")];
tensor<fp16, [1, 1, 1, 128]> var_3780_cast_fp16 = matmul(transpose_x = var_3780_transpose_x_0, transpose_y = var_3780_transpose_y_0, x = xprobs_cast_fp16, y = xvT_cast_fp16)[name = string("op_3780_cast_fp16")];
tensor<int32, [4]> var_3785 = const()[name = string("op_3785"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_3790 = const()[name = string("op_3790"), val = tensor<int32, [3]>([1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> var_3786_cast_fp16 = transpose(perm = var_3785, x = var_3780_cast_fp16)[name = string("transpose_146")];
tensor<fp16, [1, 1, 128]> input_167_cast_fp16 = reshape(shape = var_3790, x = var_3786_cast_fp16)[name = string("input_167_cast_fp16")];
tensor<fp16, [768, 128]> dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84305088))), scale = tensor<fp16, [768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84403456))))[name = string("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 1, 768]> linear_47_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = string("linear_47_cast_fp16")];
tensor<fp16, [1, 1, 768]> input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_47_cast_fp16)[name = string("input_169_cast_fp16")];
tensor<int32, [1]> x_89_axes_0 = const()[name = string("x_89_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84405056)))];
fp16 var_3798_to_fp16 = const()[name = string("op_3798_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> x_89_cast_fp16 = layer_norm(axes = x_89_axes_0, epsilon = var_3798_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_169_cast_fp16)[name = string("x_89_cast_fp16")];
tensor<int32, [3]> var_3814 = const()[name = string("op_3814"), val = tensor<int32, [3]>([0, 2, 1])];
string y_45_pad_type_0 = const()[name = string("y_45_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_45_strides_0 = const()[name = string("y_45_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_45_pad_0 = const()[name = string("y_45_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_45_dilations_0 = const()[name = string("y_45_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_45_groups_0 = const()[name = string("y_45_groups_0"), val = int32(1)];
tensor<fp16, [3072, 768, 1]> dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84406656))), scale = tensor<fp16, [3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86766016))))[name = string("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> x_91_cast_fp16 = transpose(perm = var_3814, x = x_89_cast_fp16)[name = string("transpose_145")];
tensor<fp16, [1, 3072, 1]> y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = string("y_45_cast_fp16")];
string x_93_mode_0 = const()[name = string("x_93_mode_0"), val = string("TANH_APPROXIMATION")];
tensor<fp16, [1, 3072, 1]> x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_45_cast_fp16)[name = string("x_93_cast_fp16")];
string y_pad_type_0 = const()[name = string("y_pad_type_0"), val = string("valid")];
tensor<int32, [1]> y_strides_0 = const()[name = string("y_strides_0"), val = tensor<int32, [1]>([1])];
tensor<int32, [2]> y_pad_0 = const()[name = string("y_pad_0"), val = tensor<int32, [2]>([0, 0])];
tensor<int32, [1]> y_dilations_0 = const()[name = string("y_dilations_0"), val = tensor<int32, [1]>([1])];
int32 y_groups_0 = const()[name = string("y_groups_0"), val = int32(1)];
tensor<fp16, [768, 3072, 1]> dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86772224))), scale = tensor<fp16, [768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89131584))))[name = string("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized")];
tensor<fp16, [1, 768, 1]> y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = string("y_cast_fp16")];
tensor<int32, [3]> var_3832 = const()[name = string("op_3832"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 768]> var_3833_cast_fp16 = transpose(perm = var_3832, x = y_cast_fp16)[name = string("transpose_144")];
tensor<fp16, [1, 1, 768]> input_171_cast_fp16 = add(x = input_169_cast_fp16, y = var_3833_cast_fp16)[name = string("input_171_cast_fp16")];
tensor<int32, [1]> input_axes_0 = const()[name = string("input_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [768]> dec_norm_out_weight_to_fp16 = const()[name = string("dec_norm_out_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89133184)))];
fp16 var_3837_to_fp16 = const()[name = string("op_3837_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 768]> input_cast_fp16 = layer_norm(axes = input_axes_0, epsilon = var_3837_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = input_171_cast_fp16)[name = string("input_cast_fp16")];
string input_cast_fp16_to_fp32_dtype_0 = const()[name = string("input_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
tensor<fp16, [16192, 768]> dec_final_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [16192, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89134784))), scale = tensor<fp16, [16192, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101570304))))[name = string("dec_final_proj_weight_to_fp16_quantized")];
tensor<fp16, [16192]> dec_final_proj_bias_to_fp16 = const()[name = string("dec_final_proj_bias_to_fp16"), val = tensor<fp16, [16192]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101602752)))];
tensor<fp16, [1, 1, 16192]> linear_48_cast_fp16 = linear(bias = dec_final_proj_bias_to_fp16, weight = dec_final_proj_weight_to_fp16_quantized, x = input_cast_fp16)[name = string("linear_48_cast_fp16")];
tensor<int32, [4]> var_3850 = const()[name = string("op_3850"), val = tensor<int32, [4]>([1, 1, 8, 2024])];
tensor<fp16, [1, 1, 8, 2024]> var_3851_cast_fp16 = reshape(shape = var_3850, x = linear_48_cast_fp16)[name = string("op_3851_cast_fp16")];
string var_3851_cast_fp16_to_fp32_dtype_0 = const()[name = string("op_3851_cast_fp16_to_fp32_dtype_0"), val = string("fp32")];
tensor<fp32, [1, 1, 768]> h_last = cast(dtype = input_cast_fp16_to_fp32_dtype_0, x = input_cast_fp16)[name = string("cast_1")];
tensor<fp32, [1, 1, 8, 2024]> logits = cast(dtype = var_3851_cast_fp16_to_fp32_dtype_0, x = var_3851_cast_fp16)[name = string("cast_2")];
tensor<fp32, [1, 256, 768]> encoder_output_tmp = identity(x = encoder_output)[name = string("encoder_output_tmp")];
} -> (logits, h_last);
}