program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})] { func main(tensor causal_mask, tensor decoder_input_ids, tensor encoder_hidden_states, tensor position_ids) [FlexibleShapeInformation = tuple, dict, tensor>>, tuple, dict, list, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1]}, {"decoder_input_ids", [1, 1]}, {"encoder_hidden_states", [1, 1, 128]}, {"position_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 64], [1, 64]]}, {"decoder_input_ids", [[1, 1], [1, 64]]}, {"encoder_hidden_states", [[1, 1], [1, 64], [128, 128]]}, {"position_ids", [[1, 1], [1, 64]]}})))] { tensor var_27_axis_0 = const()[name = tensor("op_27_axis_0"), val = tensor(0)]; tensor var_27_batch_dims_0 = const()[name = tensor("op_27_batch_dims_0"), val = tensor(0)]; tensor embed_tokens_weight_to_fp16 = const()[name = tensor("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor var_27_cast_fp16 = gather(axis = var_27_axis_0, batch_dims = var_27_batch_dims_0, indices = decoder_input_ids, x = embed_tokens_weight_to_fp16)[name = tensor("op_27_cast_fp16")]; tensor var_33_axis_0 = const()[name = tensor("op_33_axis_0"), val = tensor(0)]; tensor var_33_batch_dims_0 = const()[name = tensor("op_33_batch_dims_0"), val = tensor(0)]; tensor embed_positions_weight_to_fp16 = const()[name = tensor("embed_positions_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16256)))]; tensor var_33_cast_fp16 = gather(axis = var_33_axis_0, batch_dims = var_33_batch_dims_0, indices = position_ids, x = embed_positions_weight_to_fp16)[name = tensor("op_33_cast_fp16")]; tensor input_1_cast_fp16 = add(x = var_27_cast_fp16, y = var_33_cast_fp16)[name = tensor("input_1_cast_fp16")]; tensor input_3_axes_0 = const()[name = tensor("input_3_axes_0"), val = tensor([-1])]; tensor layernorm_embedding_weight_to_fp16 = const()[name = tensor("layernorm_embedding_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33216)))]; tensor layernorm_embedding_bias_to_fp16 = const()[name = tensor("layernorm_embedding_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33536)))]; tensor var_37_to_fp16 = const()[name = tensor("op_37_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = layernorm_embedding_bias_to_fp16, epsilon = var_37_to_fp16, gamma = layernorm_embedding_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor self_attn_q_weight_to_fp16 = const()[name = tensor("self_attn_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33856)))]; tensor self_attn_q_bias_to_fp16 = const()[name = tensor("self_attn_q_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66688)))]; tensor linear_0_cast_fp16 = linear(bias = self_attn_q_bias_to_fp16, weight = self_attn_q_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("linear_0_cast_fp16")]; tensor self_attn_k_weight_to_fp16 = const()[name = tensor("self_attn_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67008)))]; tensor self_attn_k_bias_to_fp16 = const()[name = tensor("self_attn_k_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99840)))]; tensor linear_1_cast_fp16 = linear(bias = self_attn_k_bias_to_fp16, weight = self_attn_k_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor self_attn_v_weight_to_fp16 = const()[name = tensor("self_attn_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100160)))]; tensor self_attn_v_bias_to_fp16 = const()[name = tensor("self_attn_v_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132992)))]; tensor linear_2_cast_fp16 = linear(bias = self_attn_v_bias_to_fp16, weight = self_attn_v_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("linear_2_cast_fp16")]; tensor var_55_transpose_x_1 = const()[name = tensor("op_55_transpose_x_1"), val = tensor(false)]; tensor var_55_transpose_y_1 = const()[name = tensor("op_55_transpose_y_1"), val = tensor(true)]; tensor var_55_cast_fp16 = matmul(transpose_x = var_55_transpose_x_1, transpose_y = var_55_transpose_y_1, x = linear_0_cast_fp16, y = linear_1_cast_fp16)[name = tensor("op_55_cast_fp16")]; tensor var_56_to_fp16 = const()[name = tensor("op_56_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_weights_1_cast_fp16 = mul(x = var_55_cast_fp16, y = var_56_to_fp16)[name = tensor("attn_weights_1_cast_fp16")]; tensor causal_mask_to_fp16_dtype_0 = const()[name = tensor("causal_mask_to_fp16_dtype_0"), val = tensor("fp16")]; tensor causal_mask_to_fp16 = cast(dtype = causal_mask_to_fp16_dtype_0, x = causal_mask)[name = tensor("cast_4")]; tensor attn_weights_3_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask_to_fp16)[name = tensor("attn_weights_3_cast_fp16")]; tensor var_60 = const()[name = tensor("op_60"), val = tensor(-1)]; tensor attn_weights_5_cast_fp16 = softmax(axis = var_60, x = attn_weights_3_cast_fp16)[name = tensor("attn_weights_5_cast_fp16")]; tensor input_5_transpose_x_0 = const()[name = tensor("input_5_transpose_x_0"), val = tensor(false)]; tensor input_5_transpose_y_0 = const()[name = tensor("input_5_transpose_y_0"), val = tensor(false)]; tensor input_5_cast_fp16 = matmul(transpose_x = input_5_transpose_x_0, transpose_y = input_5_transpose_y_0, x = attn_weights_5_cast_fp16, y = linear_2_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor self_attn_out_weight_to_fp16 = const()[name = tensor("self_attn_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133312)))]; tensor self_attn_out_bias_to_fp16 = const()[name = tensor("self_attn_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166144)))]; tensor linear_3_cast_fp16 = linear(bias = self_attn_out_bias_to_fp16, weight = self_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("linear_3_cast_fp16")]; tensor input_7_cast_fp16 = add(x = input_3_cast_fp16, y = linear_3_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor input_9_axes_0 = const()[name = tensor("input_9_axes_0"), val = tensor([-1])]; tensor self_attn_norm_weight_to_fp16 = const()[name = tensor("self_attn_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166464)))]; tensor self_attn_norm_bias_to_fp16 = const()[name = tensor("self_attn_norm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166784)))]; tensor var_70_to_fp16 = const()[name = tensor("op_70_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_9_cast_fp16 = layer_norm(axes = input_9_axes_0, beta = self_attn_norm_bias_to_fp16, epsilon = var_70_to_fp16, gamma = self_attn_norm_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor cross_attn_q_weight_to_fp16 = const()[name = tensor("cross_attn_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167104)))]; tensor cross_attn_q_bias_to_fp16 = const()[name = tensor("cross_attn_q_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199936)))]; tensor linear_4_cast_fp16 = linear(bias = cross_attn_q_bias_to_fp16, weight = cross_attn_q_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("linear_4_cast_fp16")]; tensor encoder_hidden_states_to_fp16_dtype_0 = const()[name = tensor("encoder_hidden_states_to_fp16_dtype_0"), val = tensor("fp16")]; tensor cross_attn_k_weight_to_fp16 = const()[name = tensor("cross_attn_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200256)))]; tensor cross_attn_k_bias_to_fp16 = const()[name = tensor("cross_attn_k_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233088)))]; tensor encoder_hidden_states_to_fp16 = cast(dtype = encoder_hidden_states_to_fp16_dtype_0, x = encoder_hidden_states)[name = tensor("cast_3")]; tensor linear_5_cast_fp16 = linear(bias = cross_attn_k_bias_to_fp16, weight = cross_attn_k_weight_to_fp16, x = encoder_hidden_states_to_fp16)[name = tensor("linear_5_cast_fp16")]; tensor cross_attn_v_weight_to_fp16 = const()[name = tensor("cross_attn_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233408)))]; tensor cross_attn_v_bias_to_fp16 = const()[name = tensor("cross_attn_v_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266240)))]; tensor linear_6_cast_fp16 = linear(bias = cross_attn_v_bias_to_fp16, weight = cross_attn_v_weight_to_fp16, x = encoder_hidden_states_to_fp16)[name = tensor("linear_6_cast_fp16")]; tensor var_88_transpose_x_1 = const()[name = tensor("op_88_transpose_x_1"), val = tensor(false)]; tensor var_88_transpose_y_1 = const()[name = tensor("op_88_transpose_y_1"), val = tensor(true)]; tensor var_88_cast_fp16 = matmul(transpose_x = var_88_transpose_x_1, transpose_y = var_88_transpose_y_1, x = linear_4_cast_fp16, y = linear_5_cast_fp16)[name = tensor("op_88_cast_fp16")]; tensor var_89_to_fp16 = const()[name = tensor("op_89_to_fp16"), val = tensor(0x1.6ap-4)]; tensor attn_weights_7_cast_fp16 = mul(x = var_88_cast_fp16, y = var_89_to_fp16)[name = tensor("attn_weights_7_cast_fp16")]; tensor var_91 = const()[name = tensor("op_91"), val = tensor(-1)]; tensor attn_weights_cast_fp16 = softmax(axis = var_91, x = attn_weights_7_cast_fp16)[name = tensor("attn_weights_cast_fp16")]; tensor input_11_transpose_x_0 = const()[name = tensor("input_11_transpose_x_0"), val = tensor(false)]; tensor input_11_transpose_y_0 = const()[name = tensor("input_11_transpose_y_0"), val = tensor(false)]; tensor input_11_cast_fp16 = matmul(transpose_x = input_11_transpose_x_0, transpose_y = input_11_transpose_y_0, x = attn_weights_cast_fp16, y = linear_6_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor cross_attn_out_weight_to_fp16 = const()[name = tensor("cross_attn_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266560)))]; tensor cross_attn_out_bias_to_fp16 = const()[name = tensor("cross_attn_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299392)))]; tensor linear_7_cast_fp16 = linear(bias = cross_attn_out_bias_to_fp16, weight = cross_attn_out_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor input_13_cast_fp16 = add(x = input_9_cast_fp16, y = linear_7_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_axes_0 = const()[name = tensor("input_15_axes_0"), val = tensor([-1])]; tensor cross_attn_norm_weight_to_fp16 = const()[name = tensor("cross_attn_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299712)))]; tensor cross_attn_norm_bias_to_fp16 = const()[name = tensor("cross_attn_norm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300032)))]; tensor var_101_to_fp16 = const()[name = tensor("op_101_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = cross_attn_norm_bias_to_fp16, epsilon = var_101_to_fp16, gamma = cross_attn_norm_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor fc1_weight_to_fp16 = const()[name = tensor("fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300352)))]; tensor fc1_bias_to_fp16 = const()[name = tensor("fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562560)))]; tensor linear_8_cast_fp16 = linear(bias = fc1_bias_to_fp16, weight = fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor input_17_mode_0 = const()[name = tensor("input_17_mode_0"), val = tensor("EXACT")]; tensor input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_8_cast_fp16)[name = tensor("input_17_cast_fp16")]; tensor fc2_weight_to_fp16 = const()[name = tensor("fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(564672)))]; tensor fc2_bias_to_fp16 = const()[name = tensor("fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(826880)))]; tensor linear_9_cast_fp16 = linear(bias = fc2_bias_to_fp16, weight = fc2_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor input_19_cast_fp16 = add(x = input_15_cast_fp16, y = linear_9_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_axes_0 = const()[name = tensor("input_axes_0"), val = tensor([-1])]; tensor final_layer_norm_weight_to_fp16 = const()[name = tensor("final_layer_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(827200)))]; tensor final_layer_norm_bias_to_fp16 = const()[name = tensor("final_layer_norm_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(827520)))]; tensor var_118_to_fp16 = const()[name = tensor("op_118_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_cast_fp16 = layer_norm(axes = input_axes_0, beta = final_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = final_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("input_cast_fp16")]; tensor linear_10_bias_0_to_fp16 = const()[name = tensor("linear_10_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(827840)))]; tensor logits = linear(bias = linear_10_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = input_cast_fp16)[name = tensor("linear_10_cast_fp16")]; } -> (logits); }