program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor decoder_input_ids, tensor encoder_attention_mask, tensor encoder_hidden_states) [FlexibleShapeInformation = tuple, dict, tensor>>, tuple, dict, list, ?>>>>((("DefaultShapes", {{"decoder_input_ids", [1, 8]}, {"encoder_attention_mask", [1, 16]}, {"encoder_hidden_states", [1, 16, 256]}}), ("RangeDims", {{"decoder_input_ids", [[1, 1], [1, 128]]}, {"encoder_attention_mask", [[1, 1], [1, 64]]}, {"encoder_hidden_states", [[1, 1], [1, 64], [256, 256]]}})))] { tensor decoder_embed_tokens_weight = const()[name = tensor("decoder_embed_tokens_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor decoder_block_0_layer_0_layer_norm_weight = const()[name = tensor("decoder_block_0_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(393344)))]; tensor decoder_block_0_layer_0_SelfAttention_q_weight = const()[name = tensor("decoder_block_0_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394432)))]; tensor decoder_block_0_layer_0_SelfAttention_k_weight = const()[name = tensor("decoder_block_0_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(787712)))]; tensor decoder_block_0_layer_0_SelfAttention_v_weight = const()[name = tensor("decoder_block_0_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1180992)))]; tensor decoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight = const()[name = tensor("decoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1574272)))]; tensor decoder_block_0_layer_0_SelfAttention_o_weight = const()[name = tensor("decoder_block_0_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1575104)))]; tensor decoder_block_0_layer_1_layer_norm_weight = const()[name = tensor("decoder_block_0_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1968384)))]; tensor decoder_block_0_layer_1_EncDecAttention_q_weight = const()[name = tensor("decoder_block_0_layer_1_EncDecAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1969472)))]; tensor decoder_block_0_layer_1_EncDecAttention_k_weight = const()[name = tensor("decoder_block_0_layer_1_EncDecAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2362752)))]; tensor decoder_block_0_layer_1_EncDecAttention_v_weight = const()[name = tensor("decoder_block_0_layer_1_EncDecAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2756032)))]; tensor decoder_block_0_layer_1_EncDecAttention_o_weight = const()[name = tensor("decoder_block_0_layer_1_EncDecAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3149312)))]; tensor decoder_block_0_layer_2_layer_norm_weight = const()[name = tensor("decoder_block_0_layer_2_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3542592)))]; tensor decoder_block_0_layer_2_DenseReluDense_wi_0_weight = const()[name = tensor("decoder_block_0_layer_2_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3543680)))]; tensor decoder_block_0_layer_2_DenseReluDense_wi_1_weight = const()[name = tensor("decoder_block_0_layer_2_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4592320)))]; tensor decoder_block_0_layer_2_DenseReluDense_wo_weight = const()[name = tensor("decoder_block_0_layer_2_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5640960)))]; tensor decoder_block_1_layer_0_layer_norm_weight = const()[name = tensor("decoder_block_1_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6689600)))]; tensor decoder_block_1_layer_0_SelfAttention_q_weight = const()[name = tensor("decoder_block_1_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6690688)))]; tensor decoder_block_1_layer_0_SelfAttention_k_weight = const()[name = tensor("decoder_block_1_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7083968)))]; tensor decoder_block_1_layer_0_SelfAttention_v_weight = const()[name = tensor("decoder_block_1_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7477248)))]; tensor decoder_block_1_layer_0_SelfAttention_o_weight = const()[name = tensor("decoder_block_1_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7870528)))]; tensor decoder_block_1_layer_1_layer_norm_weight = const()[name = tensor("decoder_block_1_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8263808)))]; tensor decoder_block_1_layer_1_EncDecAttention_q_weight = const()[name = tensor("decoder_block_1_layer_1_EncDecAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8264896)))]; tensor decoder_block_1_layer_1_EncDecAttention_k_weight = const()[name = tensor("decoder_block_1_layer_1_EncDecAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8658176)))]; tensor decoder_block_1_layer_1_EncDecAttention_v_weight = const()[name = tensor("decoder_block_1_layer_1_EncDecAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9051456)))]; tensor decoder_block_1_layer_1_EncDecAttention_o_weight = const()[name = tensor("decoder_block_1_layer_1_EncDecAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9444736)))]; tensor decoder_block_1_layer_2_layer_norm_weight = const()[name = tensor("decoder_block_1_layer_2_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9838016)))]; tensor decoder_block_1_layer_2_DenseReluDense_wi_0_weight = const()[name = tensor("decoder_block_1_layer_2_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9839104)))]; tensor decoder_block_1_layer_2_DenseReluDense_wi_1_weight = const()[name = tensor("decoder_block_1_layer_2_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10887744)))]; tensor decoder_block_1_layer_2_DenseReluDense_wo_weight = const()[name = tensor("decoder_block_1_layer_2_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11936384)))]; tensor decoder_block_2_layer_0_layer_norm_weight = const()[name = tensor("decoder_block_2_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12985024)))]; tensor decoder_block_2_layer_0_SelfAttention_q_weight = const()[name = tensor("decoder_block_2_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12986112)))]; tensor decoder_block_2_layer_0_SelfAttention_k_weight = const()[name = tensor("decoder_block_2_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13379392)))]; tensor decoder_block_2_layer_0_SelfAttention_v_weight = const()[name = tensor("decoder_block_2_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13772672)))]; tensor decoder_block_2_layer_0_SelfAttention_o_weight = const()[name = tensor("decoder_block_2_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14165952)))]; tensor decoder_block_2_layer_1_layer_norm_weight = const()[name = tensor("decoder_block_2_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14559232)))]; tensor decoder_block_2_layer_1_EncDecAttention_q_weight = const()[name = tensor("decoder_block_2_layer_1_EncDecAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14560320)))]; tensor decoder_block_2_layer_1_EncDecAttention_k_weight = const()[name = tensor("decoder_block_2_layer_1_EncDecAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14953600)))]; tensor decoder_block_2_layer_1_EncDecAttention_v_weight = const()[name = tensor("decoder_block_2_layer_1_EncDecAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15346880)))]; tensor decoder_block_2_layer_1_EncDecAttention_o_weight = const()[name = tensor("decoder_block_2_layer_1_EncDecAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15740160)))]; tensor decoder_block_2_layer_2_layer_norm_weight = const()[name = tensor("decoder_block_2_layer_2_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16133440)))]; tensor decoder_block_2_layer_2_DenseReluDense_wi_0_weight = const()[name = tensor("decoder_block_2_layer_2_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16134528)))]; tensor decoder_block_2_layer_2_DenseReluDense_wi_1_weight = const()[name = tensor("decoder_block_2_layer_2_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17183168)))]; tensor decoder_block_2_layer_2_DenseReluDense_wo_weight = const()[name = tensor("decoder_block_2_layer_2_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18231808)))]; tensor decoder_block_3_layer_0_layer_norm_weight = const()[name = tensor("decoder_block_3_layer_0_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19280448)))]; tensor decoder_block_3_layer_0_SelfAttention_q_weight = const()[name = tensor("decoder_block_3_layer_0_SelfAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19281536)))]; tensor decoder_block_3_layer_0_SelfAttention_k_weight = const()[name = tensor("decoder_block_3_layer_0_SelfAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19674816)))]; tensor decoder_block_3_layer_0_SelfAttention_v_weight = const()[name = tensor("decoder_block_3_layer_0_SelfAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20068096)))]; tensor decoder_block_3_layer_0_SelfAttention_o_weight = const()[name = tensor("decoder_block_3_layer_0_SelfAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20461376)))]; tensor decoder_block_3_layer_1_layer_norm_weight = const()[name = tensor("decoder_block_3_layer_1_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20854656)))]; tensor decoder_block_3_layer_1_EncDecAttention_q_weight = const()[name = tensor("decoder_block_3_layer_1_EncDecAttention_q_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20855744)))]; tensor decoder_block_3_layer_1_EncDecAttention_k_weight = const()[name = tensor("decoder_block_3_layer_1_EncDecAttention_k_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21249024)))]; tensor decoder_block_3_layer_1_EncDecAttention_v_weight = const()[name = tensor("decoder_block_3_layer_1_EncDecAttention_v_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21642304)))]; tensor decoder_block_3_layer_1_EncDecAttention_o_weight = const()[name = tensor("decoder_block_3_layer_1_EncDecAttention_o_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22035584)))]; tensor decoder_block_3_layer_2_layer_norm_weight = const()[name = tensor("decoder_block_3_layer_2_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22428864)))]; tensor decoder_block_3_layer_2_DenseReluDense_wi_0_weight = const()[name = tensor("decoder_block_3_layer_2_DenseReluDense_wi_0_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22429952)))]; tensor decoder_block_3_layer_2_DenseReluDense_wi_1_weight = const()[name = tensor("decoder_block_3_layer_2_DenseReluDense_wi_1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23478592)))]; tensor decoder_block_3_layer_2_DenseReluDense_wo_weight = const()[name = tensor("decoder_block_3_layer_2_DenseReluDense_wo_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24527232)))]; tensor decoder_final_layer_norm_weight = const()[name = tensor("decoder_final_layer_norm_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25575872)))]; tensor lm_head_weight = const()[name = tensor("lm_head_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25576960)))]; tensor var_8 = const()[name = tensor("op_8"), val = tensor(16)]; tensor var_14 = const()[name = tensor("op_14"), val = tensor(0x1p+0)]; tensor var_16 = const()[name = tensor("op_16"), val = tensor(6)]; tensor var_22 = const()[name = tensor("op_22"), val = tensor(-1)]; tensor var_23 = const()[name = tensor("op_23"), val = tensor(1)]; tensor var_35_shape = shape(x = decoder_input_ids)[name = tensor("op_35_shape")]; tensor gather_0 = const()[name = tensor("gather_0"), val = tensor(1)]; tensor gather_1_batch_dims_0 = const()[name = tensor("gather_1_batch_dims_0"), val = tensor(0)]; tensor gather_1_validate_indices_0 = const()[name = tensor("gather_1_validate_indices_0"), val = tensor(false)]; tensor select_0 = const()[name = tensor("select_0"), val = tensor(1)]; tensor gather_1_axis_1 = const()[name = tensor("gather_1_axis_1"), val = tensor(0)]; tensor gather_1 = gather(axis = gather_1_axis_1, batch_dims = gather_1_batch_dims_0, indices = select_0, validate_indices = gather_1_validate_indices_0, x = var_35_shape)[name = tensor("gather_1")]; tensor input_3_batch_dims_0 = const()[name = tensor("input_3_batch_dims_0"), val = tensor(0)]; tensor input_3_validate_indices_0 = const()[name = tensor("input_3_validate_indices_0"), val = tensor(false)]; tensor greater_equal_1_y_0 = const()[name = tensor("greater_equal_1_y_0"), val = tensor(0)]; tensor greater_equal_1 = greater_equal(x = decoder_input_ids, y = greater_equal_1_y_0)[name = tensor("greater_equal_1")]; tensor slice_by_index_1 = const()[name = tensor("slice_by_index_1"), val = tensor(384)]; tensor add_1 = add(x = decoder_input_ids, y = slice_by_index_1)[name = tensor("add_1")]; tensor select_1 = select(a = decoder_input_ids, b = add_1, cond = greater_equal_1)[name = tensor("select_1")]; tensor input_3_axis_1 = const()[name = tensor("input_3_axis_1"), val = tensor(0)]; tensor input_3 = gather(axis = input_3_axis_1, batch_dims = input_3_batch_dims_0, indices = select_1, validate_indices = input_3_validate_indices_0, x = decoder_embed_tokens_weight)[name = tensor("input_3")]; tensor concat_1_axis_0 = const()[name = tensor("concat_1_axis_0"), val = tensor(0)]; tensor concat_1_interleave_0 = const()[name = tensor("concat_1_interleave_0"), val = tensor(false)]; tensor concat_1 = concat(axis = concat_1_axis_0, interleave = concat_1_interleave_0, values = (gather_0, gather_1))[name = tensor("concat_1")]; tensor fill_0_value_0 = const()[name = tensor("fill_0_value_0"), val = tensor(0x1p+0)]; tensor fill_0 = fill(shape = concat_1, value = fill_0_value_0)[name = tensor("fill_0")]; tensor const_0 = const()[name = tensor("const_0"), val = tensor(0)]; tensor const_1 = const()[name = tensor("const_1"), val = tensor(1)]; tensor seq_ids = range_1d(end = gather_1, start = const_0, step = const_1)[name = tensor("seq_ids")]; tensor var_44_axes_0 = const()[name = tensor("op_44_axes_0"), val = tensor([0])]; tensor var_44 = expand_dims(axes = var_44_axes_0, x = seq_ids)[name = tensor("op_44")]; tensor var_45_axes_0 = const()[name = tensor("op_45_axes_0"), val = tensor([1])]; tensor var_45 = expand_dims(axes = var_45_axes_0, x = var_44)[name = tensor("op_45")]; tensor concat_2_axis_0 = const()[name = tensor("concat_2_axis_0"), val = tensor(0)]; tensor concat_2_interleave_0 = const()[name = tensor("concat_2_interleave_0"), val = tensor(false)]; tensor concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (gather_0, gather_1, var_23))[name = tensor("concat_2")]; tensor var_48 = tile(reps = concat_2, x = var_45)[name = tensor("op_48")]; tensor var_51_axes_0 = const()[name = tensor("op_51_axes_0"), val = tensor([2])]; tensor var_51 = expand_dims(axes = var_51_axes_0, x = var_44)[name = tensor("op_51")]; tensor causal_mask_1 = less_equal(x = var_48, y = var_51)[name = tensor("causal_mask_1")]; tensor causal_mask_dtype_0 = const()[name = tensor("causal_mask_dtype_0"), val = tensor("fp32")]; tensor var_55_axes_0 = const()[name = tensor("op_55_axes_0"), val = tensor([1])]; tensor causal_mask = cast(dtype = causal_mask_dtype_0, x = causal_mask_1)[name = tensor("cast_42")]; tensor var_55 = expand_dims(axes = var_55_axes_0, x = causal_mask)[name = tensor("op_55")]; tensor var_59_axes_0 = const()[name = tensor("op_59_axes_0"), val = tensor([1])]; tensor var_59 = expand_dims(axes = var_59_axes_0, x = fill_0)[name = tensor("op_59")]; tensor var_60_axes_0 = const()[name = tensor("op_60_axes_0"), val = tensor([2])]; tensor var_60 = expand_dims(axes = var_60_axes_0, x = var_59)[name = tensor("op_60")]; tensor extended_attention_mask = mul(x = var_55, y = var_60)[name = tensor("extended_attention_mask")]; tensor var_64 = sub(x = var_14, y = extended_attention_mask)[name = tensor("op_64")]; tensor var_65 = const()[name = tensor("op_65"), val = tensor(-0x1.fffffep+127)]; tensor mask_1 = mul(x = var_64, y = var_65)[name = tensor("mask_1")]; tensor var_68_axes_0 = const()[name = tensor("op_68_axes_0"), val = tensor([1])]; tensor var_68 = expand_dims(axes = var_68_axes_0, x = encoder_attention_mask)[name = tensor("op_68")]; tensor var_69_axes_0 = const()[name = tensor("op_69_axes_0"), val = tensor([2])]; tensor var_69 = expand_dims(axes = var_69_axes_0, x = var_68)[name = tensor("op_69")]; tensor var_71_dtype_0 = const()[name = tensor("op_71_dtype_0"), val = tensor("fp32")]; tensor var_71 = cast(dtype = var_71_dtype_0, x = var_69)[name = tensor("cast_41")]; tensor var_72 = sub(x = var_14, y = var_71)[name = tensor("op_72")]; tensor var_73 = const()[name = tensor("op_73"), val = tensor(-0x1.fffffep+127)]; tensor mask = mul(x = var_72, y = var_73)[name = tensor("mask")]; tensor var_18_promoted = const()[name = tensor("op_18_promoted"), val = tensor(0x1p+1)]; tensor var_86 = pow(x = input_3, y = var_18_promoted)[name = tensor("op_86")]; tensor variance_1_axes_0 = const()[name = tensor("variance_1_axes_0"), val = tensor([-1])]; tensor variance_1_keep_dims_0 = const()[name = tensor("variance_1_keep_dims_0"), val = tensor(true)]; tensor variance_1 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_86)[name = tensor("variance_1")]; tensor var_89 = const()[name = tensor("op_89"), val = tensor(0x1.0c6f7ap-20)]; tensor var_90 = add(x = variance_1, y = var_89)[name = tensor("op_90")]; tensor var_91_epsilon_0 = const()[name = tensor("op_91_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_91 = rsqrt(epsilon = var_91_epsilon_0, x = var_90)[name = tensor("op_91")]; tensor hidden_states_5 = mul(x = input_3, y = var_91)[name = tensor("hidden_states_5")]; tensor hidden_states_7 = mul(x = decoder_block_0_layer_0_layer_norm_weight, y = hidden_states_5)[name = tensor("hidden_states_7")]; tensor var_103_shape = shape(x = hidden_states_7)[name = tensor("op_103_shape")]; tensor gather_3_batch_dims_0 = const()[name = tensor("gather_3_batch_dims_0"), val = tensor(0)]; tensor gather_3_validate_indices_0 = const()[name = tensor("gather_3_validate_indices_0"), val = tensor(false)]; tensor select_2 = const()[name = tensor("select_2"), val = tensor(1)]; tensor gather_3_axis_1 = const()[name = tensor("gather_3_axis_1"), val = tensor(0)]; tensor gather_3 = gather(axis = gather_3_axis_1, batch_dims = gather_3_batch_dims_0, indices = select_2, validate_indices = gather_3_validate_indices_0, x = var_103_shape)[name = tensor("gather_3")]; tensor linear_0_bias_0 = const()[name = tensor("linear_0_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25970240)))]; tensor states_1 = linear(bias = linear_0_bias_0, weight = decoder_block_0_layer_0_SelfAttention_q_weight, x = hidden_states_7)[name = tensor("linear_0")]; tensor var_106 = const()[name = tensor("op_106"), val = tensor([1, -1, 6, 64])]; tensor var_107 = reshape(shape = var_106, x = states_1)[name = tensor("op_107")]; tensor states_3 = linear(bias = linear_0_bias_0, weight = decoder_block_0_layer_0_SelfAttention_k_weight, x = hidden_states_7)[name = tensor("linear_1")]; tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, -1, 6, 64])]; tensor var_112 = reshape(shape = var_111, x = states_3)[name = tensor("op_112")]; tensor states_5 = linear(bias = linear_0_bias_0, weight = decoder_block_0_layer_0_SelfAttention_v_weight, x = hidden_states_7)[name = tensor("linear_2")]; tensor var_116 = const()[name = tensor("op_116"), val = tensor([1, -1, 6, 64])]; tensor var_117 = reshape(shape = var_116, x = states_5)[name = tensor("op_117")]; tensor value_states_1_perm_0 = const()[name = tensor("value_states_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_1_transpose_x_0 = const()[name = tensor("scores_1_transpose_x_0"), val = tensor(false)]; tensor scores_1_transpose_y_0 = const()[name = tensor("scores_1_transpose_y_0"), val = tensor(false)]; tensor transpose_24_perm_0 = const()[name = tensor("transpose_24_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_25_perm_0 = const()[name = tensor("transpose_25_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_25 = transpose(perm = transpose_25_perm_0, x = var_112)[name = tensor("transpose_70")]; tensor transpose_24 = transpose(perm = transpose_24_perm_0, x = var_107)[name = tensor("transpose_71")]; tensor scores_1 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_24, y = transpose_25)[name = tensor("scores_1")]; tensor const_2 = const()[name = tensor("const_2"), val = tensor(0)]; tensor const_3 = const()[name = tensor("const_3"), val = tensor(1)]; tensor var_121 = range_1d(end = gather_3, start = const_2, step = const_3)[name = tensor("op_121")]; tensor context_position_axes_0 = const()[name = tensor("context_position_axes_0"), val = tensor([1])]; tensor context_position = expand_dims(axes = context_position_axes_0, x = var_121)[name = tensor("context_position")]; tensor var_125_axes_0 = const()[name = tensor("op_125_axes_0"), val = tensor([0])]; tensor var_125 = expand_dims(axes = var_125_axes_0, x = var_121)[name = tensor("op_125")]; tensor relative_position_1 = sub(x = var_125, y = context_position)[name = tensor("relative_position_1")]; tensor var_128 = sub(x = relative_position_1, y = relative_position_1)[name = tensor("sub_0")]; tensor var_129 = minimum(x = relative_position_1, y = var_128)[name = tensor("op_129")]; tensor const_6 = const()[name = tensor("const_6"), val = tensor(-1)]; tensor relative_position = mul(x = var_129, y = const_6)[name = tensor("relative_position")]; tensor is_small = less(x = relative_position, y = var_8)[name = tensor("is_small")]; tensor var_132_dtype_0 = const()[name = tensor("op_132_dtype_0"), val = tensor("fp32")]; tensor _inversed_134_y_0 = const()[name = tensor("_inversed_134_y_0"), val = tensor(0x1p-4)]; tensor var_132 = cast(dtype = var_132_dtype_0, x = relative_position)[name = tensor("cast_40")]; tensor _inversed_134 = mul(x = var_132, y = _inversed_134_y_0)[name = tensor("_inversed_134")]; tensor var_135_epsilon_0 = const()[name = tensor("op_135_epsilon_0"), val = tensor(0x1p-149)]; tensor var_135 = log(epsilon = var_135_epsilon_0, x = _inversed_134)[name = tensor("op_135")]; tensor _inversed_137_y_0 = const()[name = tensor("_inversed_137_y_0"), val = tensor(0x1.ec709ep-2)]; tensor _inversed_137 = mul(x = var_135, y = _inversed_137_y_0)[name = tensor("_inversed_137")]; tensor var_138_promoted = const()[name = tensor("op_138_promoted"), val = tensor(0x1p+4)]; tensor var_139 = mul(x = _inversed_137, y = var_138_promoted)[name = tensor("op_139")]; tensor var_140_dtype_0 = const()[name = tensor("op_140_dtype_0"), val = tensor("int32")]; tensor var_141 = const()[name = tensor("op_141"), val = tensor(16)]; tensor var_140 = cast(dtype = var_140_dtype_0, x = var_139)[name = tensor("cast_39")]; tensor relative_position_if_large_1 = add(x = var_140, y = var_141)[name = tensor("relative_position_if_large_1")]; tensor var_143_value_0 = const()[name = tensor("op_143_value_0"), val = tensor(31)]; tensor var_143 = fill_like(ref_tensor = relative_position_if_large_1, value = var_143_value_0)[name = tensor("op_143")]; tensor relative_position_if_large = minimum(x = relative_position_if_large_1, y = var_143)[name = tensor("relative_position_if_large")]; tensor var_145 = select(a = relative_position, b = relative_position_if_large, cond = is_small)[name = tensor("op_145")]; tensor values_batch_dims_0 = const()[name = tensor("values_batch_dims_0"), val = tensor(0)]; tensor values_validate_indices_0 = const()[name = tensor("values_validate_indices_0"), val = tensor(false)]; tensor greater_equal_3_y_0 = const()[name = tensor("greater_equal_3_y_0"), val = tensor(0)]; tensor greater_equal_3 = greater_equal(x = var_145, y = greater_equal_3_y_0)[name = tensor("greater_equal_3")]; tensor slice_by_index_3 = const()[name = tensor("slice_by_index_3"), val = tensor(32)]; tensor add_3 = add(x = var_145, y = slice_by_index_3)[name = tensor("add_3")]; tensor select_3 = select(a = var_145, b = add_3, cond = greater_equal_3)[name = tensor("select_3")]; tensor values_axis_1 = const()[name = tensor("values_axis_1"), val = tensor(0)]; tensor values = gather(axis = values_axis_1, batch_dims = values_batch_dims_0, indices = select_3, validate_indices = values_validate_indices_0, x = decoder_block_0_layer_0_SelfAttention_relative_attention_bias_weight)[name = tensor("values")]; tensor var_150 = const()[name = tensor("op_150"), val = tensor([2, 0, 1])]; tensor position_bias_1_axes_0 = const()[name = tensor("position_bias_1_axes_0"), val = tensor([0])]; tensor var_151 = transpose(perm = var_150, x = values)[name = tensor("transpose_69")]; tensor position_bias_1 = expand_dims(axes = position_bias_1_axes_0, x = var_151)[name = tensor("position_bias_1")]; tensor position_bias_3 = add(x = position_bias_1, y = mask_1)[name = tensor("position_bias_3")]; tensor scores_3 = add(x = scores_1, y = position_bias_3)[name = tensor("scores_3")]; tensor var_156 = softmax(axis = var_22, x = scores_3)[name = tensor("op_156")]; tensor states_7_transpose_x_0 = const()[name = tensor("states_7_transpose_x_0"), val = tensor(false)]; tensor states_7_transpose_y_0 = const()[name = tensor("states_7_transpose_y_0"), val = tensor(false)]; tensor value_states_1 = transpose(perm = value_states_1_perm_0, x = var_117)[name = tensor("transpose_72")]; tensor states_7 = matmul(transpose_x = states_7_transpose_x_0, transpose_y = states_7_transpose_y_0, x = var_156, y = value_states_1)[name = tensor("states_7")]; tensor var_160_perm_0 = const()[name = tensor("op_160_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_162 = const()[name = tensor("op_162"), val = tensor([1, -1, 384])]; tensor var_160 = transpose(perm = var_160_perm_0, x = states_7)[name = tensor("transpose_68")]; tensor input_11 = reshape(shape = var_162, x = var_160)[name = tensor("input_11")]; tensor linear_3_bias_0 = const()[name = tensor("linear_3_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25971840)))]; tensor input_13 = linear(bias = linear_3_bias_0, weight = decoder_block_0_layer_0_SelfAttention_o_weight, x = input_11)[name = tensor("linear_3")]; tensor hidden_states_9 = add(x = input_3, y = input_13)[name = tensor("hidden_states_9")]; tensor var_18_promoted_1 = const()[name = tensor("op_18_promoted_1"), val = tensor(0x1p+1)]; tensor var_180 = pow(x = hidden_states_9, y = var_18_promoted_1)[name = tensor("op_180")]; tensor variance_3_axes_0 = const()[name = tensor("variance_3_axes_0"), val = tensor([-1])]; tensor variance_3_keep_dims_0 = const()[name = tensor("variance_3_keep_dims_0"), val = tensor(true)]; tensor variance_3 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_180)[name = tensor("variance_3")]; tensor var_183 = const()[name = tensor("op_183"), val = tensor(0x1.0c6f7ap-20)]; tensor var_184 = add(x = variance_3, y = var_183)[name = tensor("op_184")]; tensor var_185_epsilon_0 = const()[name = tensor("op_185_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_185 = rsqrt(epsilon = var_185_epsilon_0, x = var_184)[name = tensor("op_185")]; tensor hidden_states_13 = mul(x = hidden_states_9, y = var_185)[name = tensor("hidden_states_13")]; tensor hidden_states_15 = mul(x = decoder_block_0_layer_1_layer_norm_weight, y = hidden_states_13)[name = tensor("hidden_states_15")]; tensor var_196_shape = shape(x = hidden_states_15)[name = tensor("op_196_shape")]; tensor gather_5_batch_dims_0 = const()[name = tensor("gather_5_batch_dims_0"), val = tensor(0)]; tensor gather_5_validate_indices_0 = const()[name = tensor("gather_5_validate_indices_0"), val = tensor(false)]; tensor select_4 = const()[name = tensor("select_4"), val = tensor(1)]; tensor gather_5_axis_1 = const()[name = tensor("gather_5_axis_1"), val = tensor(0)]; tensor gather_5 = gather(axis = gather_5_axis_1, batch_dims = gather_5_batch_dims_0, indices = select_4, validate_indices = gather_5_validate_indices_0, x = var_196_shape)[name = tensor("gather_5")]; tensor var_197_shape = shape(x = encoder_hidden_states)[name = tensor("op_197_shape")]; tensor gather_6_batch_dims_0 = const()[name = tensor("gather_6_batch_dims_0"), val = tensor(0)]; tensor gather_6_validate_indices_0 = const()[name = tensor("gather_6_validate_indices_0"), val = tensor(false)]; tensor select_5 = const()[name = tensor("select_5"), val = tensor(1)]; tensor gather_6_axis_1 = const()[name = tensor("gather_6_axis_1"), val = tensor(0)]; tensor gather_6 = gather(axis = gather_6_axis_1, batch_dims = gather_6_batch_dims_0, indices = select_5, validate_indices = gather_6_validate_indices_0, x = var_197_shape)[name = tensor("gather_6")]; tensor states_9 = linear(bias = linear_0_bias_0, weight = decoder_block_0_layer_1_EncDecAttention_q_weight, x = hidden_states_15)[name = tensor("linear_4")]; tensor var_200 = const()[name = tensor("op_200"), val = tensor([1, -1, 6, 64])]; tensor var_201 = reshape(shape = var_200, x = states_9)[name = tensor("op_201")]; tensor states_11 = linear(bias = linear_0_bias_0, weight = decoder_block_0_layer_1_EncDecAttention_k_weight, x = encoder_hidden_states)[name = tensor("linear_5")]; tensor var_205 = const()[name = tensor("op_205"), val = tensor([1, -1, 6, 64])]; tensor var_206 = reshape(shape = var_205, x = states_11)[name = tensor("op_206")]; tensor states_13 = linear(bias = linear_0_bias_0, weight = decoder_block_0_layer_1_EncDecAttention_v_weight, x = encoder_hidden_states)[name = tensor("linear_6")]; tensor var_210 = const()[name = tensor("op_210"), val = tensor([1, -1, 6, 64])]; tensor var_211 = reshape(shape = var_210, x = states_13)[name = tensor("op_211")]; tensor value_states_3_perm_0 = const()[name = tensor("value_states_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_5_transpose_x_0 = const()[name = tensor("scores_5_transpose_x_0"), val = tensor(false)]; tensor scores_5_transpose_y_0 = const()[name = tensor("scores_5_transpose_y_0"), val = tensor(false)]; tensor transpose_26_perm_0 = const()[name = tensor("transpose_26_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_27_perm_0 = const()[name = tensor("transpose_27_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_27 = transpose(perm = transpose_27_perm_0, x = var_206)[name = tensor("transpose_65")]; tensor transpose_26 = transpose(perm = transpose_26_perm_0, x = var_201)[name = tensor("transpose_66")]; tensor scores_5 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_26, y = transpose_27)[name = tensor("scores_5")]; tensor concat_3_axis_0 = const()[name = tensor("concat_3_axis_0"), val = tensor(0)]; tensor concat_3_interleave_0 = const()[name = tensor("concat_3_interleave_0"), val = tensor(false)]; tensor concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (var_23, var_16, gather_5, gather_6))[name = tensor("concat_3")]; tensor position_bias_5_value_0 = const()[name = tensor("position_bias_5_value_0"), val = tensor(0x0p+0)]; tensor position_bias_5 = fill(shape = concat_3, value = position_bias_5_value_0)[name = tensor("position_bias_5")]; tensor position_bias = add(x = position_bias_5, y = mask)[name = tensor("position_bias")]; tensor scores_7 = add(x = scores_5, y = position_bias)[name = tensor("scores_7")]; tensor var_220 = softmax(axis = var_22, x = scores_7)[name = tensor("op_220")]; tensor states_15_transpose_x_0 = const()[name = tensor("states_15_transpose_x_0"), val = tensor(false)]; tensor states_15_transpose_y_0 = const()[name = tensor("states_15_transpose_y_0"), val = tensor(false)]; tensor value_states_3 = transpose(perm = value_states_3_perm_0, x = var_211)[name = tensor("transpose_67")]; tensor states_15 = matmul(transpose_x = states_15_transpose_x_0, transpose_y = states_15_transpose_y_0, x = var_220, y = value_states_3)[name = tensor("states_15")]; tensor var_224_perm_0 = const()[name = tensor("op_224_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_226 = const()[name = tensor("op_226"), val = tensor([1, -1, 384])]; tensor var_224 = transpose(perm = var_224_perm_0, x = states_15)[name = tensor("transpose_64")]; tensor input_19 = reshape(shape = var_226, x = var_224)[name = tensor("input_19")]; tensor input_21 = linear(bias = linear_3_bias_0, weight = decoder_block_0_layer_1_EncDecAttention_o_weight, x = input_19)[name = tensor("linear_7")]; tensor hidden_states_17 = add(x = hidden_states_9, y = input_21)[name = tensor("hidden_states_17")]; tensor var_18_promoted_2 = const()[name = tensor("op_18_promoted_2"), val = tensor(0x1p+1)]; tensor var_242 = pow(x = hidden_states_17, y = var_18_promoted_2)[name = tensor("op_242")]; tensor variance_5_axes_0 = const()[name = tensor("variance_5_axes_0"), val = tensor([-1])]; tensor variance_5_keep_dims_0 = const()[name = tensor("variance_5_keep_dims_0"), val = tensor(true)]; tensor variance_5 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_242)[name = tensor("variance_5")]; tensor var_245 = const()[name = tensor("op_245"), val = tensor(0x1.0c6f7ap-20)]; tensor var_246 = add(x = variance_5, y = var_245)[name = tensor("op_246")]; tensor var_247_epsilon_0 = const()[name = tensor("op_247_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_247 = rsqrt(epsilon = var_247_epsilon_0, x = var_246)[name = tensor("op_247")]; tensor hidden_states_21 = mul(x = hidden_states_17, y = var_247)[name = tensor("hidden_states_21")]; tensor input_23 = mul(x = decoder_block_0_layer_2_layer_norm_weight, y = hidden_states_21)[name = tensor("input_23")]; tensor linear_8_bias_0 = const()[name = tensor("linear_8_bias_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25972928)))]; tensor input_25 = linear(bias = linear_8_bias_0, weight = decoder_block_0_layer_2_DenseReluDense_wi_0_weight, x = input_23)[name = tensor("linear_8")]; tensor hidden_gelu_1_mode_0 = const()[name = tensor("hidden_gelu_1_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_1 = gelu(mode = hidden_gelu_1_mode_0, x = input_25)[name = tensor("hidden_gelu_1")]; tensor hidden_linear_1 = linear(bias = linear_8_bias_0, weight = decoder_block_0_layer_2_DenseReluDense_wi_1_weight, x = input_23)[name = tensor("linear_9")]; tensor input_27 = mul(x = hidden_gelu_1, y = hidden_linear_1)[name = tensor("input_27")]; tensor input_31 = linear(bias = linear_3_bias_0, weight = decoder_block_0_layer_2_DenseReluDense_wo_weight, x = input_27)[name = tensor("linear_10")]; tensor hidden_states_23 = add(x = hidden_states_17, y = input_31)[name = tensor("hidden_states_23")]; tensor var_18_promoted_3 = const()[name = tensor("op_18_promoted_3"), val = tensor(0x1p+1)]; tensor var_292 = pow(x = hidden_states_23, y = var_18_promoted_3)[name = tensor("op_292")]; tensor variance_7_axes_0 = const()[name = tensor("variance_7_axes_0"), val = tensor([-1])]; tensor variance_7_keep_dims_0 = const()[name = tensor("variance_7_keep_dims_0"), val = tensor(true)]; tensor variance_7 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_292)[name = tensor("variance_7")]; tensor var_295 = const()[name = tensor("op_295"), val = tensor(0x1.0c6f7ap-20)]; tensor var_296 = add(x = variance_7, y = var_295)[name = tensor("op_296")]; tensor var_297_epsilon_0 = const()[name = tensor("op_297_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_297 = rsqrt(epsilon = var_297_epsilon_0, x = var_296)[name = tensor("op_297")]; tensor hidden_states_27 = mul(x = hidden_states_23, y = var_297)[name = tensor("hidden_states_27")]; tensor hidden_states_29 = mul(x = decoder_block_1_layer_0_layer_norm_weight, y = hidden_states_27)[name = tensor("hidden_states_29")]; tensor states_17 = linear(bias = linear_0_bias_0, weight = decoder_block_1_layer_0_SelfAttention_q_weight, x = hidden_states_29)[name = tensor("linear_11")]; tensor var_310 = const()[name = tensor("op_310"), val = tensor([1, -1, 6, 64])]; tensor var_311 = reshape(shape = var_310, x = states_17)[name = tensor("op_311")]; tensor states_19 = linear(bias = linear_0_bias_0, weight = decoder_block_1_layer_0_SelfAttention_k_weight, x = hidden_states_29)[name = tensor("linear_12")]; tensor var_315 = const()[name = tensor("op_315"), val = tensor([1, -1, 6, 64])]; tensor var_316 = reshape(shape = var_315, x = states_19)[name = tensor("op_316")]; tensor states_21 = linear(bias = linear_0_bias_0, weight = decoder_block_1_layer_0_SelfAttention_v_weight, x = hidden_states_29)[name = tensor("linear_13")]; tensor var_320 = const()[name = tensor("op_320"), val = tensor([1, -1, 6, 64])]; tensor var_321 = reshape(shape = var_320, x = states_21)[name = tensor("op_321")]; tensor value_states_5_perm_0 = const()[name = tensor("value_states_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_9_transpose_x_0 = const()[name = tensor("scores_9_transpose_x_0"), val = tensor(false)]; tensor scores_9_transpose_y_0 = const()[name = tensor("scores_9_transpose_y_0"), val = tensor(false)]; tensor transpose_28_perm_0 = const()[name = tensor("transpose_28_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_29_perm_0 = const()[name = tensor("transpose_29_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_29 = transpose(perm = transpose_29_perm_0, x = var_316)[name = tensor("transpose_61")]; tensor transpose_28 = transpose(perm = transpose_28_perm_0, x = var_311)[name = tensor("transpose_62")]; tensor scores_9 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_28, y = transpose_29)[name = tensor("scores_9")]; tensor scores_11 = add(x = scores_9, y = position_bias_3)[name = tensor("scores_11")]; tensor var_327 = softmax(axis = var_22, x = scores_11)[name = tensor("op_327")]; tensor states_23_transpose_x_0 = const()[name = tensor("states_23_transpose_x_0"), val = tensor(false)]; tensor states_23_transpose_y_0 = const()[name = tensor("states_23_transpose_y_0"), val = tensor(false)]; tensor value_states_5 = transpose(perm = value_states_5_perm_0, x = var_321)[name = tensor("transpose_63")]; tensor states_23 = matmul(transpose_x = states_23_transpose_x_0, transpose_y = states_23_transpose_y_0, x = var_327, y = value_states_5)[name = tensor("states_23")]; tensor var_331_perm_0 = const()[name = tensor("op_331_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_333 = const()[name = tensor("op_333"), val = tensor([1, -1, 384])]; tensor var_331 = transpose(perm = var_331_perm_0, x = states_23)[name = tensor("transpose_60")]; tensor input_37 = reshape(shape = var_333, x = var_331)[name = tensor("input_37")]; tensor input_39 = linear(bias = linear_3_bias_0, weight = decoder_block_1_layer_0_SelfAttention_o_weight, x = input_37)[name = tensor("linear_14")]; tensor hidden_states_31 = add(x = hidden_states_23, y = input_39)[name = tensor("hidden_states_31")]; tensor var_18_promoted_4 = const()[name = tensor("op_18_promoted_4"), val = tensor(0x1p+1)]; tensor var_349 = pow(x = hidden_states_31, y = var_18_promoted_4)[name = tensor("op_349")]; tensor variance_9_axes_0 = const()[name = tensor("variance_9_axes_0"), val = tensor([-1])]; tensor variance_9_keep_dims_0 = const()[name = tensor("variance_9_keep_dims_0"), val = tensor(true)]; tensor variance_9 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_349)[name = tensor("variance_9")]; tensor var_352 = const()[name = tensor("op_352"), val = tensor(0x1.0c6f7ap-20)]; tensor var_353 = add(x = variance_9, y = var_352)[name = tensor("op_353")]; tensor var_354_epsilon_0 = const()[name = tensor("op_354_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_354 = rsqrt(epsilon = var_354_epsilon_0, x = var_353)[name = tensor("op_354")]; tensor hidden_states_35 = mul(x = hidden_states_31, y = var_354)[name = tensor("hidden_states_35")]; tensor hidden_states_37 = mul(x = decoder_block_1_layer_1_layer_norm_weight, y = hidden_states_35)[name = tensor("hidden_states_37")]; tensor states_25 = linear(bias = linear_0_bias_0, weight = decoder_block_1_layer_1_EncDecAttention_q_weight, x = hidden_states_37)[name = tensor("linear_15")]; tensor var_367 = const()[name = tensor("op_367"), val = tensor([1, -1, 6, 64])]; tensor var_368 = reshape(shape = var_367, x = states_25)[name = tensor("op_368")]; tensor states_27 = linear(bias = linear_0_bias_0, weight = decoder_block_1_layer_1_EncDecAttention_k_weight, x = encoder_hidden_states)[name = tensor("linear_16")]; tensor var_372 = const()[name = tensor("op_372"), val = tensor([1, -1, 6, 64])]; tensor var_373 = reshape(shape = var_372, x = states_27)[name = tensor("op_373")]; tensor states_29 = linear(bias = linear_0_bias_0, weight = decoder_block_1_layer_1_EncDecAttention_v_weight, x = encoder_hidden_states)[name = tensor("linear_17")]; tensor var_377 = const()[name = tensor("op_377"), val = tensor([1, -1, 6, 64])]; tensor var_378 = reshape(shape = var_377, x = states_29)[name = tensor("op_378")]; tensor value_states_7_perm_0 = const()[name = tensor("value_states_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_13_transpose_x_0 = const()[name = tensor("scores_13_transpose_x_0"), val = tensor(false)]; tensor scores_13_transpose_y_0 = const()[name = tensor("scores_13_transpose_y_0"), val = tensor(false)]; tensor transpose_30_perm_0 = const()[name = tensor("transpose_30_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_31_perm_0 = const()[name = tensor("transpose_31_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_31 = transpose(perm = transpose_31_perm_0, x = var_373)[name = tensor("transpose_57")]; tensor transpose_30 = transpose(perm = transpose_30_perm_0, x = var_368)[name = tensor("transpose_58")]; tensor scores_13 = matmul(transpose_x = scores_13_transpose_x_0, transpose_y = scores_13_transpose_y_0, x = transpose_30, y = transpose_31)[name = tensor("scores_13")]; tensor scores_15 = add(x = scores_13, y = position_bias)[name = tensor("scores_15")]; tensor var_384 = softmax(axis = var_22, x = scores_15)[name = tensor("op_384")]; tensor states_31_transpose_x_0 = const()[name = tensor("states_31_transpose_x_0"), val = tensor(false)]; tensor states_31_transpose_y_0 = const()[name = tensor("states_31_transpose_y_0"), val = tensor(false)]; tensor value_states_7 = transpose(perm = value_states_7_perm_0, x = var_378)[name = tensor("transpose_59")]; tensor states_31 = matmul(transpose_x = states_31_transpose_x_0, transpose_y = states_31_transpose_y_0, x = var_384, y = value_states_7)[name = tensor("states_31")]; tensor var_388_perm_0 = const()[name = tensor("op_388_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_390 = const()[name = tensor("op_390"), val = tensor([1, -1, 384])]; tensor var_388 = transpose(perm = var_388_perm_0, x = states_31)[name = tensor("transpose_56")]; tensor input_45 = reshape(shape = var_390, x = var_388)[name = tensor("input_45")]; tensor input_47 = linear(bias = linear_3_bias_0, weight = decoder_block_1_layer_1_EncDecAttention_o_weight, x = input_45)[name = tensor("linear_18")]; tensor hidden_states_39 = add(x = hidden_states_31, y = input_47)[name = tensor("hidden_states_39")]; tensor var_18_promoted_5 = const()[name = tensor("op_18_promoted_5"), val = tensor(0x1p+1)]; tensor var_400 = pow(x = hidden_states_39, y = var_18_promoted_5)[name = tensor("op_400")]; tensor variance_11_axes_0 = const()[name = tensor("variance_11_axes_0"), val = tensor([-1])]; tensor variance_11_keep_dims_0 = const()[name = tensor("variance_11_keep_dims_0"), val = tensor(true)]; tensor variance_11 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_400)[name = tensor("variance_11")]; tensor var_403 = const()[name = tensor("op_403"), val = tensor(0x1.0c6f7ap-20)]; tensor var_404 = add(x = variance_11, y = var_403)[name = tensor("op_404")]; tensor var_405_epsilon_0 = const()[name = tensor("op_405_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_405 = rsqrt(epsilon = var_405_epsilon_0, x = var_404)[name = tensor("op_405")]; tensor hidden_states_43 = mul(x = hidden_states_39, y = var_405)[name = tensor("hidden_states_43")]; tensor input_49 = mul(x = decoder_block_1_layer_2_layer_norm_weight, y = hidden_states_43)[name = tensor("input_49")]; tensor input_51 = linear(bias = linear_8_bias_0, weight = decoder_block_1_layer_2_DenseReluDense_wi_0_weight, x = input_49)[name = tensor("linear_19")]; tensor hidden_gelu_3_mode_0 = const()[name = tensor("hidden_gelu_3_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_3 = gelu(mode = hidden_gelu_3_mode_0, x = input_51)[name = tensor("hidden_gelu_3")]; tensor hidden_linear_3 = linear(bias = linear_8_bias_0, weight = decoder_block_1_layer_2_DenseReluDense_wi_1_weight, x = input_49)[name = tensor("linear_20")]; tensor input_53 = mul(x = hidden_gelu_3, y = hidden_linear_3)[name = tensor("input_53")]; tensor input_57 = linear(bias = linear_3_bias_0, weight = decoder_block_1_layer_2_DenseReluDense_wo_weight, x = input_53)[name = tensor("linear_21")]; tensor hidden_states_45 = add(x = hidden_states_39, y = input_57)[name = tensor("hidden_states_45")]; tensor var_18_promoted_6 = const()[name = tensor("op_18_promoted_6"), val = tensor(0x1p+1)]; tensor var_446 = pow(x = hidden_states_45, y = var_18_promoted_6)[name = tensor("op_446")]; tensor variance_13_axes_0 = const()[name = tensor("variance_13_axes_0"), val = tensor([-1])]; tensor variance_13_keep_dims_0 = const()[name = tensor("variance_13_keep_dims_0"), val = tensor(true)]; tensor variance_13 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_446)[name = tensor("variance_13")]; tensor var_449 = const()[name = tensor("op_449"), val = tensor(0x1.0c6f7ap-20)]; tensor var_450 = add(x = variance_13, y = var_449)[name = tensor("op_450")]; tensor var_451_epsilon_0 = const()[name = tensor("op_451_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_451 = rsqrt(epsilon = var_451_epsilon_0, x = var_450)[name = tensor("op_451")]; tensor hidden_states_49 = mul(x = hidden_states_45, y = var_451)[name = tensor("hidden_states_49")]; tensor hidden_states_51 = mul(x = decoder_block_2_layer_0_layer_norm_weight, y = hidden_states_49)[name = tensor("hidden_states_51")]; tensor states_33 = linear(bias = linear_0_bias_0, weight = decoder_block_2_layer_0_SelfAttention_q_weight, x = hidden_states_51)[name = tensor("linear_22")]; tensor var_464 = const()[name = tensor("op_464"), val = tensor([1, -1, 6, 64])]; tensor var_465 = reshape(shape = var_464, x = states_33)[name = tensor("op_465")]; tensor states_35 = linear(bias = linear_0_bias_0, weight = decoder_block_2_layer_0_SelfAttention_k_weight, x = hidden_states_51)[name = tensor("linear_23")]; tensor var_469 = const()[name = tensor("op_469"), val = tensor([1, -1, 6, 64])]; tensor var_470 = reshape(shape = var_469, x = states_35)[name = tensor("op_470")]; tensor states_37 = linear(bias = linear_0_bias_0, weight = decoder_block_2_layer_0_SelfAttention_v_weight, x = hidden_states_51)[name = tensor("linear_24")]; tensor var_474 = const()[name = tensor("op_474"), val = tensor([1, -1, 6, 64])]; tensor var_475 = reshape(shape = var_474, x = states_37)[name = tensor("op_475")]; tensor value_states_9_perm_0 = const()[name = tensor("value_states_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_17_transpose_x_0 = const()[name = tensor("scores_17_transpose_x_0"), val = tensor(false)]; tensor scores_17_transpose_y_0 = const()[name = tensor("scores_17_transpose_y_0"), val = tensor(false)]; tensor transpose_32_perm_0 = const()[name = tensor("transpose_32_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_33_perm_0 = const()[name = tensor("transpose_33_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_33 = transpose(perm = transpose_33_perm_0, x = var_470)[name = tensor("transpose_53")]; tensor transpose_32 = transpose(perm = transpose_32_perm_0, x = var_465)[name = tensor("transpose_54")]; tensor scores_17 = matmul(transpose_x = scores_17_transpose_x_0, transpose_y = scores_17_transpose_y_0, x = transpose_32, y = transpose_33)[name = tensor("scores_17")]; tensor scores_19 = add(x = scores_17, y = position_bias_3)[name = tensor("scores_19")]; tensor var_481 = softmax(axis = var_22, x = scores_19)[name = tensor("op_481")]; tensor states_39_transpose_x_0 = const()[name = tensor("states_39_transpose_x_0"), val = tensor(false)]; tensor states_39_transpose_y_0 = const()[name = tensor("states_39_transpose_y_0"), val = tensor(false)]; tensor value_states_9 = transpose(perm = value_states_9_perm_0, x = var_475)[name = tensor("transpose_55")]; tensor states_39 = matmul(transpose_x = states_39_transpose_x_0, transpose_y = states_39_transpose_y_0, x = var_481, y = value_states_9)[name = tensor("states_39")]; tensor var_485_perm_0 = const()[name = tensor("op_485_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_487 = const()[name = tensor("op_487"), val = tensor([1, -1, 384])]; tensor var_485 = transpose(perm = var_485_perm_0, x = states_39)[name = tensor("transpose_52")]; tensor input_63 = reshape(shape = var_487, x = var_485)[name = tensor("input_63")]; tensor input_65 = linear(bias = linear_3_bias_0, weight = decoder_block_2_layer_0_SelfAttention_o_weight, x = input_63)[name = tensor("linear_25")]; tensor hidden_states_53 = add(x = hidden_states_45, y = input_65)[name = tensor("hidden_states_53")]; tensor var_18_promoted_7 = const()[name = tensor("op_18_promoted_7"), val = tensor(0x1p+1)]; tensor var_503 = pow(x = hidden_states_53, y = var_18_promoted_7)[name = tensor("op_503")]; tensor variance_15_axes_0 = const()[name = tensor("variance_15_axes_0"), val = tensor([-1])]; tensor variance_15_keep_dims_0 = const()[name = tensor("variance_15_keep_dims_0"), val = tensor(true)]; tensor variance_15 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_503)[name = tensor("variance_15")]; tensor var_506 = const()[name = tensor("op_506"), val = tensor(0x1.0c6f7ap-20)]; tensor var_507 = add(x = variance_15, y = var_506)[name = tensor("op_507")]; tensor var_508_epsilon_0 = const()[name = tensor("op_508_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_508 = rsqrt(epsilon = var_508_epsilon_0, x = var_507)[name = tensor("op_508")]; tensor hidden_states_57 = mul(x = hidden_states_53, y = var_508)[name = tensor("hidden_states_57")]; tensor hidden_states_59 = mul(x = decoder_block_2_layer_1_layer_norm_weight, y = hidden_states_57)[name = tensor("hidden_states_59")]; tensor states_41 = linear(bias = linear_0_bias_0, weight = decoder_block_2_layer_1_EncDecAttention_q_weight, x = hidden_states_59)[name = tensor("linear_26")]; tensor var_521 = const()[name = tensor("op_521"), val = tensor([1, -1, 6, 64])]; tensor var_522 = reshape(shape = var_521, x = states_41)[name = tensor("op_522")]; tensor states_43 = linear(bias = linear_0_bias_0, weight = decoder_block_2_layer_1_EncDecAttention_k_weight, x = encoder_hidden_states)[name = tensor("linear_27")]; tensor var_526 = const()[name = tensor("op_526"), val = tensor([1, -1, 6, 64])]; tensor var_527 = reshape(shape = var_526, x = states_43)[name = tensor("op_527")]; tensor states_45 = linear(bias = linear_0_bias_0, weight = decoder_block_2_layer_1_EncDecAttention_v_weight, x = encoder_hidden_states)[name = tensor("linear_28")]; tensor var_531 = const()[name = tensor("op_531"), val = tensor([1, -1, 6, 64])]; tensor var_532 = reshape(shape = var_531, x = states_45)[name = tensor("op_532")]; tensor value_states_11_perm_0 = const()[name = tensor("value_states_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_21_transpose_x_0 = const()[name = tensor("scores_21_transpose_x_0"), val = tensor(false)]; tensor scores_21_transpose_y_0 = const()[name = tensor("scores_21_transpose_y_0"), val = tensor(false)]; tensor transpose_34_perm_0 = const()[name = tensor("transpose_34_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_35_perm_0 = const()[name = tensor("transpose_35_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_35 = transpose(perm = transpose_35_perm_0, x = var_527)[name = tensor("transpose_49")]; tensor transpose_34 = transpose(perm = transpose_34_perm_0, x = var_522)[name = tensor("transpose_50")]; tensor scores_21 = matmul(transpose_x = scores_21_transpose_x_0, transpose_y = scores_21_transpose_y_0, x = transpose_34, y = transpose_35)[name = tensor("scores_21")]; tensor scores_23 = add(x = scores_21, y = position_bias)[name = tensor("scores_23")]; tensor var_538 = softmax(axis = var_22, x = scores_23)[name = tensor("op_538")]; tensor states_47_transpose_x_0 = const()[name = tensor("states_47_transpose_x_0"), val = tensor(false)]; tensor states_47_transpose_y_0 = const()[name = tensor("states_47_transpose_y_0"), val = tensor(false)]; tensor value_states_11 = transpose(perm = value_states_11_perm_0, x = var_532)[name = tensor("transpose_51")]; tensor states_47 = matmul(transpose_x = states_47_transpose_x_0, transpose_y = states_47_transpose_y_0, x = var_538, y = value_states_11)[name = tensor("states_47")]; tensor var_542_perm_0 = const()[name = tensor("op_542_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_544 = const()[name = tensor("op_544"), val = tensor([1, -1, 384])]; tensor var_542 = transpose(perm = var_542_perm_0, x = states_47)[name = tensor("transpose_48")]; tensor input_71 = reshape(shape = var_544, x = var_542)[name = tensor("input_71")]; tensor input_73 = linear(bias = linear_3_bias_0, weight = decoder_block_2_layer_1_EncDecAttention_o_weight, x = input_71)[name = tensor("linear_29")]; tensor hidden_states_61 = add(x = hidden_states_53, y = input_73)[name = tensor("hidden_states_61")]; tensor var_18_promoted_8 = const()[name = tensor("op_18_promoted_8"), val = tensor(0x1p+1)]; tensor var_554 = pow(x = hidden_states_61, y = var_18_promoted_8)[name = tensor("op_554")]; tensor variance_17_axes_0 = const()[name = tensor("variance_17_axes_0"), val = tensor([-1])]; tensor variance_17_keep_dims_0 = const()[name = tensor("variance_17_keep_dims_0"), val = tensor(true)]; tensor variance_17 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_554)[name = tensor("variance_17")]; tensor var_557 = const()[name = tensor("op_557"), val = tensor(0x1.0c6f7ap-20)]; tensor var_558 = add(x = variance_17, y = var_557)[name = tensor("op_558")]; tensor var_559_epsilon_0 = const()[name = tensor("op_559_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_559 = rsqrt(epsilon = var_559_epsilon_0, x = var_558)[name = tensor("op_559")]; tensor hidden_states_65 = mul(x = hidden_states_61, y = var_559)[name = tensor("hidden_states_65")]; tensor input_75 = mul(x = decoder_block_2_layer_2_layer_norm_weight, y = hidden_states_65)[name = tensor("input_75")]; tensor input_77 = linear(bias = linear_8_bias_0, weight = decoder_block_2_layer_2_DenseReluDense_wi_0_weight, x = input_75)[name = tensor("linear_30")]; tensor hidden_gelu_5_mode_0 = const()[name = tensor("hidden_gelu_5_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu_5 = gelu(mode = hidden_gelu_5_mode_0, x = input_77)[name = tensor("hidden_gelu_5")]; tensor hidden_linear_5 = linear(bias = linear_8_bias_0, weight = decoder_block_2_layer_2_DenseReluDense_wi_1_weight, x = input_75)[name = tensor("linear_31")]; tensor input_79 = mul(x = hidden_gelu_5, y = hidden_linear_5)[name = tensor("input_79")]; tensor input_83 = linear(bias = linear_3_bias_0, weight = decoder_block_2_layer_2_DenseReluDense_wo_weight, x = input_79)[name = tensor("linear_32")]; tensor hidden_states_67 = add(x = hidden_states_61, y = input_83)[name = tensor("hidden_states_67")]; tensor var_18_promoted_9 = const()[name = tensor("op_18_promoted_9"), val = tensor(0x1p+1)]; tensor var_600 = pow(x = hidden_states_67, y = var_18_promoted_9)[name = tensor("op_600")]; tensor variance_19_axes_0 = const()[name = tensor("variance_19_axes_0"), val = tensor([-1])]; tensor variance_19_keep_dims_0 = const()[name = tensor("variance_19_keep_dims_0"), val = tensor(true)]; tensor variance_19 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_600)[name = tensor("variance_19")]; tensor var_603 = const()[name = tensor("op_603"), val = tensor(0x1.0c6f7ap-20)]; tensor var_604 = add(x = variance_19, y = var_603)[name = tensor("op_604")]; tensor var_605_epsilon_0 = const()[name = tensor("op_605_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_605 = rsqrt(epsilon = var_605_epsilon_0, x = var_604)[name = tensor("op_605")]; tensor hidden_states_71 = mul(x = hidden_states_67, y = var_605)[name = tensor("hidden_states_71")]; tensor hidden_states_73 = mul(x = decoder_block_3_layer_0_layer_norm_weight, y = hidden_states_71)[name = tensor("hidden_states_73")]; tensor states_49 = linear(bias = linear_0_bias_0, weight = decoder_block_3_layer_0_SelfAttention_q_weight, x = hidden_states_73)[name = tensor("linear_33")]; tensor var_618 = const()[name = tensor("op_618"), val = tensor([1, -1, 6, 64])]; tensor var_619 = reshape(shape = var_618, x = states_49)[name = tensor("op_619")]; tensor states_51 = linear(bias = linear_0_bias_0, weight = decoder_block_3_layer_0_SelfAttention_k_weight, x = hidden_states_73)[name = tensor("linear_34")]; tensor var_623 = const()[name = tensor("op_623"), val = tensor([1, -1, 6, 64])]; tensor var_624 = reshape(shape = var_623, x = states_51)[name = tensor("op_624")]; tensor states_53 = linear(bias = linear_0_bias_0, weight = decoder_block_3_layer_0_SelfAttention_v_weight, x = hidden_states_73)[name = tensor("linear_35")]; tensor var_628 = const()[name = tensor("op_628"), val = tensor([1, -1, 6, 64])]; tensor var_629 = reshape(shape = var_628, x = states_53)[name = tensor("op_629")]; tensor value_states_13_perm_0 = const()[name = tensor("value_states_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_25_transpose_x_0 = const()[name = tensor("scores_25_transpose_x_0"), val = tensor(false)]; tensor scores_25_transpose_y_0 = const()[name = tensor("scores_25_transpose_y_0"), val = tensor(false)]; tensor transpose_36_perm_0 = const()[name = tensor("transpose_36_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_37_perm_0 = const()[name = tensor("transpose_37_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_37 = transpose(perm = transpose_37_perm_0, x = var_624)[name = tensor("transpose_45")]; tensor transpose_36 = transpose(perm = transpose_36_perm_0, x = var_619)[name = tensor("transpose_46")]; tensor scores_25 = matmul(transpose_x = scores_25_transpose_x_0, transpose_y = scores_25_transpose_y_0, x = transpose_36, y = transpose_37)[name = tensor("scores_25")]; tensor scores_27 = add(x = scores_25, y = position_bias_3)[name = tensor("scores_27")]; tensor var_635 = softmax(axis = var_22, x = scores_27)[name = tensor("op_635")]; tensor states_55_transpose_x_0 = const()[name = tensor("states_55_transpose_x_0"), val = tensor(false)]; tensor states_55_transpose_y_0 = const()[name = tensor("states_55_transpose_y_0"), val = tensor(false)]; tensor value_states_13 = transpose(perm = value_states_13_perm_0, x = var_629)[name = tensor("transpose_47")]; tensor states_55 = matmul(transpose_x = states_55_transpose_x_0, transpose_y = states_55_transpose_y_0, x = var_635, y = value_states_13)[name = tensor("states_55")]; tensor var_639_perm_0 = const()[name = tensor("op_639_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_641 = const()[name = tensor("op_641"), val = tensor([1, -1, 384])]; tensor var_639 = transpose(perm = var_639_perm_0, x = states_55)[name = tensor("transpose_44")]; tensor input_89 = reshape(shape = var_641, x = var_639)[name = tensor("input_89")]; tensor input_91 = linear(bias = linear_3_bias_0, weight = decoder_block_3_layer_0_SelfAttention_o_weight, x = input_89)[name = tensor("linear_36")]; tensor hidden_states_75 = add(x = hidden_states_67, y = input_91)[name = tensor("hidden_states_75")]; tensor var_18_promoted_10 = const()[name = tensor("op_18_promoted_10"), val = tensor(0x1p+1)]; tensor var_657 = pow(x = hidden_states_75, y = var_18_promoted_10)[name = tensor("op_657")]; tensor variance_21_axes_0 = const()[name = tensor("variance_21_axes_0"), val = tensor([-1])]; tensor variance_21_keep_dims_0 = const()[name = tensor("variance_21_keep_dims_0"), val = tensor(true)]; tensor variance_21 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_657)[name = tensor("variance_21")]; tensor var_660 = const()[name = tensor("op_660"), val = tensor(0x1.0c6f7ap-20)]; tensor var_661 = add(x = variance_21, y = var_660)[name = tensor("op_661")]; tensor var_662_epsilon_0 = const()[name = tensor("op_662_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_662 = rsqrt(epsilon = var_662_epsilon_0, x = var_661)[name = tensor("op_662")]; tensor hidden_states_79 = mul(x = hidden_states_75, y = var_662)[name = tensor("hidden_states_79")]; tensor hidden_states_81 = mul(x = decoder_block_3_layer_1_layer_norm_weight, y = hidden_states_79)[name = tensor("hidden_states_81")]; tensor states_57 = linear(bias = linear_0_bias_0, weight = decoder_block_3_layer_1_EncDecAttention_q_weight, x = hidden_states_81)[name = tensor("linear_37")]; tensor var_675 = const()[name = tensor("op_675"), val = tensor([1, -1, 6, 64])]; tensor var_676 = reshape(shape = var_675, x = states_57)[name = tensor("op_676")]; tensor states_59 = linear(bias = linear_0_bias_0, weight = decoder_block_3_layer_1_EncDecAttention_k_weight, x = encoder_hidden_states)[name = tensor("linear_38")]; tensor var_680 = const()[name = tensor("op_680"), val = tensor([1, -1, 6, 64])]; tensor var_681 = reshape(shape = var_680, x = states_59)[name = tensor("op_681")]; tensor states_61 = linear(bias = linear_0_bias_0, weight = decoder_block_3_layer_1_EncDecAttention_v_weight, x = encoder_hidden_states)[name = tensor("linear_39")]; tensor var_685 = const()[name = tensor("op_685"), val = tensor([1, -1, 6, 64])]; tensor var_686 = reshape(shape = var_685, x = states_61)[name = tensor("op_686")]; tensor value_states_perm_0 = const()[name = tensor("value_states_perm_0"), val = tensor([0, 2, 1, 3])]; tensor scores_29_transpose_x_0 = const()[name = tensor("scores_29_transpose_x_0"), val = tensor(false)]; tensor scores_29_transpose_y_0 = const()[name = tensor("scores_29_transpose_y_0"), val = tensor(false)]; tensor transpose_38_perm_0 = const()[name = tensor("transpose_38_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_39_perm_0 = const()[name = tensor("transpose_39_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_39 = transpose(perm = transpose_39_perm_0, x = var_681)[name = tensor("transpose_41")]; tensor transpose_38 = transpose(perm = transpose_38_perm_0, x = var_676)[name = tensor("transpose_42")]; tensor scores_29 = matmul(transpose_x = scores_29_transpose_x_0, transpose_y = scores_29_transpose_y_0, x = transpose_38, y = transpose_39)[name = tensor("scores_29")]; tensor scores = add(x = scores_29, y = position_bias)[name = tensor("scores")]; tensor var_692 = softmax(axis = var_22, x = scores)[name = tensor("op_692")]; tensor states_transpose_x_0 = const()[name = tensor("states_transpose_x_0"), val = tensor(false)]; tensor states_transpose_y_0 = const()[name = tensor("states_transpose_y_0"), val = tensor(false)]; tensor value_states = transpose(perm = value_states_perm_0, x = var_686)[name = tensor("transpose_43")]; tensor states = matmul(transpose_x = states_transpose_x_0, transpose_y = states_transpose_y_0, x = var_692, y = value_states)[name = tensor("states")]; tensor var_696_perm_0 = const()[name = tensor("op_696_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_698 = const()[name = tensor("op_698"), val = tensor([1, -1, 384])]; tensor var_696 = transpose(perm = var_696_perm_0, x = states)[name = tensor("transpose_40")]; tensor input_97 = reshape(shape = var_698, x = var_696)[name = tensor("input_97")]; tensor input_99 = linear(bias = linear_3_bias_0, weight = decoder_block_3_layer_1_EncDecAttention_o_weight, x = input_97)[name = tensor("linear_40")]; tensor hidden_states_83 = add(x = hidden_states_75, y = input_99)[name = tensor("hidden_states_83")]; tensor var_18_promoted_11 = const()[name = tensor("op_18_promoted_11"), val = tensor(0x1p+1)]; tensor var_708 = pow(x = hidden_states_83, y = var_18_promoted_11)[name = tensor("op_708")]; tensor variance_23_axes_0 = const()[name = tensor("variance_23_axes_0"), val = tensor([-1])]; tensor variance_23_keep_dims_0 = const()[name = tensor("variance_23_keep_dims_0"), val = tensor(true)]; tensor variance_23 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_708)[name = tensor("variance_23")]; tensor var_711 = const()[name = tensor("op_711"), val = tensor(0x1.0c6f7ap-20)]; tensor var_712 = add(x = variance_23, y = var_711)[name = tensor("op_712")]; tensor var_713_epsilon_0 = const()[name = tensor("op_713_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_713 = rsqrt(epsilon = var_713_epsilon_0, x = var_712)[name = tensor("op_713")]; tensor hidden_states_87 = mul(x = hidden_states_83, y = var_713)[name = tensor("hidden_states_87")]; tensor input_101 = mul(x = decoder_block_3_layer_2_layer_norm_weight, y = hidden_states_87)[name = tensor("input_101")]; tensor input_103 = linear(bias = linear_8_bias_0, weight = decoder_block_3_layer_2_DenseReluDense_wi_0_weight, x = input_101)[name = tensor("linear_41")]; tensor hidden_gelu_mode_0 = const()[name = tensor("hidden_gelu_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor hidden_gelu = gelu(mode = hidden_gelu_mode_0, x = input_103)[name = tensor("hidden_gelu")]; tensor hidden_linear = linear(bias = linear_8_bias_0, weight = decoder_block_3_layer_2_DenseReluDense_wi_1_weight, x = input_101)[name = tensor("linear_42")]; tensor input_105 = mul(x = hidden_gelu, y = hidden_linear)[name = tensor("input_105")]; tensor input_109 = linear(bias = linear_3_bias_0, weight = decoder_block_3_layer_2_DenseReluDense_wo_weight, x = input_105)[name = tensor("linear_43")]; tensor hidden_states_89 = add(x = hidden_states_83, y = input_109)[name = tensor("hidden_states_89")]; tensor var_18_promoted_12 = const()[name = tensor("op_18_promoted_12"), val = tensor(0x1p+1)]; tensor var_746 = pow(x = hidden_states_89, y = var_18_promoted_12)[name = tensor("op_746")]; tensor variance_axes_0 = const()[name = tensor("variance_axes_0"), val = tensor([-1])]; tensor variance_keep_dims_0 = const()[name = tensor("variance_keep_dims_0"), val = tensor(true)]; tensor variance = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_746)[name = tensor("variance")]; tensor var_749 = const()[name = tensor("op_749"), val = tensor(0x1.0c6f7ap-20)]; tensor var_750 = add(x = variance, y = var_749)[name = tensor("op_750")]; tensor var_751_epsilon_0 = const()[name = tensor("op_751_epsilon_0"), val = tensor(0x1.197998p-40)]; tensor var_751 = rsqrt(epsilon = var_751_epsilon_0, x = var_750)[name = tensor("op_751")]; tensor hidden_states = mul(x = hidden_states_89, y = var_751)[name = tensor("hidden_states")]; tensor input_111 = mul(x = decoder_final_layer_norm_weight, y = hidden_states)[name = tensor("input_111")]; tensor logits = linear(bias = linear_0_bias_0, weight = lm_head_weight, x = input_111)[name = tensor("linear_44")]; } -> (logits); }