program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor attention_mask, tensor input_embeds, state> k_cache_0, state> k_cache_1, state> k_cache_10, state> k_cache_11, state> k_cache_12, state> k_cache_13, state> k_cache_14, state> k_cache_15, state> k_cache_16, state> k_cache_17, state> k_cache_18, state> k_cache_19, state> k_cache_2, state> k_cache_20, state> k_cache_21, state> k_cache_22, state> k_cache_23, state> k_cache_24, state> k_cache_25, state> k_cache_26, state> k_cache_27, state> k_cache_3, state> k_cache_4, state> k_cache_5, state> k_cache_6, state> k_cache_7, state> k_cache_8, state> k_cache_9, tensor position, state> v_cache_0, state> v_cache_1, state> v_cache_10, state> v_cache_11, state> v_cache_12, state> v_cache_13, state> v_cache_14, state> v_cache_15, state> v_cache_16, state> v_cache_17, state> v_cache_18, state> v_cache_19, state> v_cache_2, state> v_cache_20, state> v_cache_21, state> v_cache_22, state> v_cache_23, state> v_cache_24, state> v_cache_25, state> v_cache_26, state> v_cache_27, state> v_cache_3, state> v_cache_4, state> v_cache_5, state> v_cache_6, state> v_cache_7, state> v_cache_8, state> v_cache_9) { int32 var_124_one_hot_vector_size_0 = const()[name = string("op_124_one_hot_vector_size_0"), val = int32(1024)]; int32 var_124_axis_0 = const()[name = string("op_124_axis_0"), val = int32(-1)]; int32 var_124_on_value_0 = const()[name = string("op_124_on_value_0"), val = int32(1)]; int32 var_124_off_value_0 = const()[name = string("op_124_off_value_0"), val = int32(0)]; tensor var_124 = one_hot(axis = var_124_axis_0, indices = position, off_value = var_124_off_value_0, on_value = var_124_on_value_0, one_hot_vector_size = var_124_one_hot_vector_size_0)[name = string("op_124")]; tensor var_134 = const()[name = string("op_134"), val = tensor([1, 1, 1024, 1])]; string var_129_to_fp16_dtype_0 = const()[name = string("op_129_to_fp16_dtype_0"), val = string("fp16")]; tensor var_124_to_fp16 = cast(dtype = var_129_to_fp16_dtype_0, x = var_124)[name = string("cast_3")]; tensor onehot_cast_fp16 = reshape(shape = var_134, x = var_124_to_fp16)[name = string("onehot_cast_fp16")]; int32 var_149 = const()[name = string("op_149"), val = int32(-1)]; string input_embeds_to_fp16_dtype_0 = const()[name = string("input_embeds_to_fp16_dtype_0"), val = string("fp16")]; fp16 var_148_promoted_to_fp16 = const()[name = string("op_148_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor input_embeds_to_fp16 = cast(dtype = input_embeds_to_fp16_dtype_0, x = input_embeds)[name = string("cast_2")]; tensor var_158_cast_fp16 = pow(x = input_embeds_to_fp16, y = var_148_promoted_to_fp16)[name = string("op_158_cast_fp16")]; tensor var_160_axes_0 = const()[name = string("op_160_axes_0"), val = tensor([-1])]; bool var_160_keep_dims_0 = const()[name = string("op_160_keep_dims_0"), val = bool(true)]; tensor var_160_cast_fp16 = reduce_mean(axes = var_160_axes_0, keep_dims = var_160_keep_dims_0, x = var_158_cast_fp16)[name = string("op_160_cast_fp16")]; fp16 var_161_to_fp16 = const()[name = string("op_161_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_162_cast_fp16 = add(x = var_160_cast_fp16, y = var_161_to_fp16)[name = string("op_162_cast_fp16")]; fp32 norm_1_epsilon_0 = const()[name = string("norm_1_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_1_cast_fp16 = rsqrt(epsilon = norm_1_epsilon_0, x = var_162_cast_fp16)[name = string("norm_1_cast_fp16")]; tensor var_164_cast_fp16 = mul(x = input_embeds_to_fp16, y = norm_1_cast_fp16)[name = string("op_164_cast_fp16")]; tensor layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_165_cast_fp16 = mul(x = var_164_cast_fp16, y = layers_0_input_layernorm_weight_to_fp16)[name = string("op_165_cast_fp16")]; tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099392))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099968)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = var_165_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_181 = const()[name = string("op_181"), val = tensor([1, 1, 16, 128])]; tensor var_182_cast_fp16 = reshape(shape = var_181, x = linear_0_cast_fp16)[name = string("op_182_cast_fp16")]; tensor x_5_perm_0 = const()[name = string("x_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2104128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3152768))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3153344)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = var_165_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_186 = const()[name = string("op_186"), val = tensor([1, 1, 8, 128])]; tensor var_187_cast_fp16 = reshape(shape = var_186, x = linear_1_cast_fp16)[name = string("op_187_cast_fp16")]; tensor x_9_perm_0 = const()[name = string("x_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3155456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204096))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = var_165_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_191 = const()[name = string("op_191"), val = tensor([1, 1, 8, 128])]; tensor var_192_cast_fp16 = reshape(shape = var_191, x = linear_2_cast_fp16)[name = string("op_192_cast_fp16")]; tensor v_1_perm_0 = const()[name = string("v_1_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_148_promoted_1_to_fp16 = const()[name = string("op_148_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_5_cast_fp16 = transpose(perm = x_5_perm_0, x = var_182_cast_fp16)[name = string("transpose_111")]; tensor var_196_cast_fp16 = pow(x = x_5_cast_fp16, y = var_148_promoted_1_to_fp16)[name = string("op_196_cast_fp16")]; tensor var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor([-1])]; bool var_198_keep_dims_0 = const()[name = string("op_198_keep_dims_0"), val = bool(true)]; tensor var_198_cast_fp16 = reduce_mean(axes = var_198_axes_0, keep_dims = var_198_keep_dims_0, x = var_196_cast_fp16)[name = string("op_198_cast_fp16")]; fp16 var_199_to_fp16 = const()[name = string("op_199_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_200_cast_fp16 = add(x = var_198_cast_fp16, y = var_199_to_fp16)[name = string("op_200_cast_fp16")]; fp32 norm_3_epsilon_0 = const()[name = string("norm_3_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_3_cast_fp16 = rsqrt(epsilon = norm_3_epsilon_0, x = var_200_cast_fp16)[name = string("norm_3_cast_fp16")]; tensor var_202_cast_fp16 = mul(x = x_5_cast_fp16, y = norm_3_cast_fp16)[name = string("op_202_cast_fp16")]; tensor layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204672)))]; tensor var_203_cast_fp16 = mul(x = var_202_cast_fp16, y = layers_0_self_attn_q_norm_weight_to_fp16)[name = string("op_203_cast_fp16")]; fp16 var_148_promoted_2_to_fp16 = const()[name = string("op_148_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_9_cast_fp16 = transpose(perm = x_9_perm_0, x = var_187_cast_fp16)[name = string("transpose_110")]; tensor var_207_cast_fp16 = pow(x = x_9_cast_fp16, y = var_148_promoted_2_to_fp16)[name = string("op_207_cast_fp16")]; tensor var_209_axes_0 = const()[name = string("op_209_axes_0"), val = tensor([-1])]; bool var_209_keep_dims_0 = const()[name = string("op_209_keep_dims_0"), val = bool(true)]; tensor var_209_cast_fp16 = reduce_mean(axes = var_209_axes_0, keep_dims = var_209_keep_dims_0, x = var_207_cast_fp16)[name = string("op_209_cast_fp16")]; fp16 var_210_to_fp16 = const()[name = string("op_210_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_211_cast_fp16 = add(x = var_209_cast_fp16, y = var_210_to_fp16)[name = string("op_211_cast_fp16")]; fp32 norm_5_epsilon_0 = const()[name = string("norm_5_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_5_cast_fp16 = rsqrt(epsilon = norm_5_epsilon_0, x = var_211_cast_fp16)[name = string("norm_5_cast_fp16")]; tensor var_213_cast_fp16 = mul(x = x_9_cast_fp16, y = norm_5_cast_fp16)[name = string("op_213_cast_fp16")]; tensor layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204992)))]; tensor var_214_cast_fp16 = mul(x = var_213_cast_fp16, y = layers_0_self_attn_k_norm_weight_to_fp16)[name = string("op_214_cast_fp16")]; string var_217_to_fp16_dtype_0 = const()[name = string("op_217_to_fp16_dtype_0"), val = string("fp16")]; tensor layers_0_self_attn_rope_inv_freq_to_fp16 = const()[name = string("layers_0_self_attn_rope_inv_freq_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205312)))]; tensor position_to_fp16 = cast(dtype = var_217_to_fp16_dtype_0, x = position)[name = string("cast_1")]; tensor freqs_1_cast_fp16 = mul(x = position_to_fp16, y = layers_0_self_attn_rope_inv_freq_to_fp16)[name = string("freqs_1_cast_fp16")]; tensor var_219_cast_fp16 = cos(x = freqs_1_cast_fp16)[name = string("op_219_cast_fp16")]; tensor var_220 = const()[name = string("op_220"), val = tensor([1, 1, 1, -1])]; tensor cos_val_1_cast_fp16 = reshape(shape = var_220, x = var_219_cast_fp16)[name = string("cos_val_1_cast_fp16")]; tensor var_222_cast_fp16 = sin(x = freqs_1_cast_fp16)[name = string("op_222_cast_fp16")]; tensor var_223 = const()[name = string("op_223"), val = tensor([1, 1, 1, -1])]; tensor sin_val_1_cast_fp16 = reshape(shape = var_223, x = var_222_cast_fp16)[name = string("sin_val_1_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_203_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_203_cast_fp16)[name = string("x2_1_cast_fp16")]; tensor var_232_cast_fp16 = mul(x = x1_1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_232_cast_fp16")]; tensor var_233_cast_fp16 = mul(x = x2_1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_233_cast_fp16")]; tensor var_234_cast_fp16 = sub(x = var_232_cast_fp16, y = var_233_cast_fp16)[name = string("op_234_cast_fp16")]; tensor var_235_cast_fp16 = mul(x = x2_1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_235_cast_fp16")]; tensor var_236_cast_fp16 = mul(x = x1_1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_236_cast_fp16")]; tensor var_237_cast_fp16 = add(x = var_235_cast_fp16, y = var_236_cast_fp16)[name = string("op_237_cast_fp16")]; bool q_1_interleave_0 = const()[name = string("q_1_interleave_0"), val = bool(false)]; tensor q_1_cast_fp16 = concat(axis = var_149, interleave = q_1_interleave_0, values = (var_234_cast_fp16, var_237_cast_fp16))[name = string("q_1_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_214_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_214_cast_fp16)[name = string("x2_3_cast_fp16")]; tensor var_256_cast_fp16 = mul(x = x1_3_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_256_cast_fp16")]; tensor var_257_cast_fp16 = mul(x = x2_3_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_257_cast_fp16")]; tensor var_258_cast_fp16 = sub(x = var_256_cast_fp16, y = var_257_cast_fp16)[name = string("op_258_cast_fp16")]; tensor var_259_cast_fp16 = mul(x = x2_3_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_259_cast_fp16")]; tensor var_260_cast_fp16 = mul(x = x1_3_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_260_cast_fp16")]; tensor var_261_cast_fp16 = add(x = var_259_cast_fp16, y = var_260_cast_fp16)[name = string("op_261_cast_fp16")]; bool k_1_interleave_0 = const()[name = string("k_1_interleave_0"), val = bool(false)]; tensor k_1_cast_fp16 = concat(axis = var_149, interleave = k_1_interleave_0, values = (var_258_cast_fp16, var_261_cast_fp16))[name = string("k_1_cast_fp16")]; fp16 var_151_promoted_to_fp16 = const()[name = string("op_151_promoted_to_fp16"), val = fp16(0x1p+0)]; tensor var_264_cast_fp16 = sub(x = var_151_promoted_to_fp16, y = onehot_cast_fp16)[name = string("op_264_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache_0)[name = string("read_state_0")]; tensor k_cache_3_cast_fp16 = mul(x = read_state_0, y = var_264_cast_fp16)[name = string("k_cache_3_cast_fp16")]; write_state(data = k_cache_3_cast_fp16, input = k_cache_0)[name = string("coreml_update_state_112_write_state")]; tensor coreml_update_state_112 = read_state(input = k_cache_0)[name = string("coreml_update_state_112")]; tensor var_266_cast_fp16 = mul(x = k_1_cast_fp16, y = onehot_cast_fp16)[name = string("op_266_cast_fp16")]; tensor k_cache_5_cast_fp16 = add(x = coreml_update_state_112, y = var_266_cast_fp16)[name = string("k_cache_5_cast_fp16")]; write_state(data = k_cache_5_cast_fp16, input = k_cache_0)[name = string("coreml_update_state_113_write_state")]; tensor coreml_update_state_113 = read_state(input = k_cache_0)[name = string("coreml_update_state_113")]; tensor read_state_1 = read_state(input = v_cache_0)[name = string("read_state_1")]; tensor v_cache_3_cast_fp16 = mul(x = read_state_1, y = var_264_cast_fp16)[name = string("v_cache_3_cast_fp16")]; write_state(data = v_cache_3_cast_fp16, input = v_cache_0)[name = string("coreml_update_state_114_write_state")]; tensor coreml_update_state_114 = read_state(input = v_cache_0)[name = string("coreml_update_state_114")]; tensor v_1_cast_fp16 = transpose(perm = v_1_perm_0, x = var_192_cast_fp16)[name = string("transpose_109")]; tensor var_270_cast_fp16 = mul(x = v_1_cast_fp16, y = onehot_cast_fp16)[name = string("op_270_cast_fp16")]; tensor v_cache_5_cast_fp16 = add(x = coreml_update_state_114, y = var_270_cast_fp16)[name = string("v_cache_5_cast_fp16")]; write_state(data = v_cache_5_cast_fp16, input = v_cache_0)[name = string("coreml_update_state_115_write_state")]; tensor coreml_update_state_115 = read_state(input = v_cache_0)[name = string("coreml_update_state_115")]; tensor var_272_axes_0 = const()[name = string("op_272_axes_0"), val = tensor([2])]; tensor var_272_cast_fp16 = expand_dims(axes = var_272_axes_0, x = coreml_update_state_113)[name = string("op_272_cast_fp16")]; tensor k_exp_1_reps_0 = const()[name = string("k_exp_1_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_1_cast_fp16 = tile(reps = k_exp_1_reps_0, x = var_272_cast_fp16)[name = string("k_exp_1_cast_fp16")]; tensor var_275 = const()[name = string("op_275"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_3_cast_fp16 = reshape(shape = var_275, x = k_exp_1_cast_fp16)[name = string("k_exp_3_cast_fp16")]; tensor var_277_axes_0 = const()[name = string("op_277_axes_0"), val = tensor([2])]; tensor var_277_cast_fp16 = expand_dims(axes = var_277_axes_0, x = coreml_update_state_115)[name = string("op_277_cast_fp16")]; tensor v_exp_1_reps_0 = const()[name = string("v_exp_1_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_1_cast_fp16 = tile(reps = v_exp_1_reps_0, x = var_277_cast_fp16)[name = string("v_exp_1_cast_fp16")]; tensor var_280 = const()[name = string("op_280"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_3_cast_fp16 = reshape(shape = var_280, x = v_exp_1_cast_fp16)[name = string("v_exp_3_cast_fp16")]; bool var_283_transpose_x_1 = const()[name = string("op_283_transpose_x_1"), val = bool(false)]; bool var_283_transpose_y_1 = const()[name = string("op_283_transpose_y_1"), val = bool(true)]; tensor var_283_cast_fp16 = matmul(transpose_x = var_283_transpose_x_1, transpose_y = var_283_transpose_y_1, x = q_1_cast_fp16, y = k_exp_3_cast_fp16)[name = string("op_283_cast_fp16")]; fp16 var_284_to_fp16 = const()[name = string("op_284_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_1_cast_fp16 = mul(x = var_283_cast_fp16, y = var_284_to_fp16)[name = string("attn_1_cast_fp16")]; string attention_mask_to_fp16_dtype_0 = const()[name = string("attention_mask_to_fp16_dtype_0"), val = string("fp16")]; tensor attention_mask_to_fp16 = cast(dtype = attention_mask_to_fp16_dtype_0, x = attention_mask)[name = string("cast_0")]; tensor input_1_cast_fp16 = add(x = attn_1_cast_fp16, y = attention_mask_to_fp16)[name = string("input_1_cast_fp16")]; tensor attn_3_cast_fp16 = softmax(axis = var_149, x = input_1_cast_fp16)[name = string("attn_3_cast_fp16")]; bool out_1_transpose_x_0 = const()[name = string("out_1_transpose_x_0"), val = bool(false)]; bool out_1_transpose_y_0 = const()[name = string("out_1_transpose_y_0"), val = bool(false)]; tensor out_1_cast_fp16 = matmul(transpose_x = out_1_transpose_x_0, transpose_y = out_1_transpose_y_0, x = attn_3_cast_fp16, y = v_exp_3_cast_fp16)[name = string("out_1_cast_fp16")]; tensor var_289_perm_0 = const()[name = string("op_289_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_290 = const()[name = string("op_290"), val = tensor([1, 1, -1])]; tensor var_289_cast_fp16 = transpose(perm = var_289_perm_0, x = out_1_cast_fp16)[name = string("transpose_108")]; tensor input_3_cast_fp16 = reshape(shape = var_290, x = var_289_cast_fp16)[name = string("input_3_cast_fp16")]; tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6302720))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_19_cast_fp16 = add(x = input_embeds_to_fp16, y = linear_3_cast_fp16)[name = string("x_19_cast_fp16")]; fp16 var_148_promoted_3_to_fp16 = const()[name = string("op_148_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_297_cast_fp16 = pow(x = x_19_cast_fp16, y = var_148_promoted_3_to_fp16)[name = string("op_297_cast_fp16")]; tensor var_299_axes_0 = const()[name = string("op_299_axes_0"), val = tensor([-1])]; bool var_299_keep_dims_0 = const()[name = string("op_299_keep_dims_0"), val = bool(true)]; tensor var_299_cast_fp16 = reduce_mean(axes = var_299_axes_0, keep_dims = var_299_keep_dims_0, x = var_297_cast_fp16)[name = string("op_299_cast_fp16")]; fp16 var_300_to_fp16 = const()[name = string("op_300_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_301_cast_fp16 = add(x = var_299_cast_fp16, y = var_300_to_fp16)[name = string("op_301_cast_fp16")]; fp32 norm_7_epsilon_0 = const()[name = string("norm_7_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_7_cast_fp16 = rsqrt(epsilon = norm_7_epsilon_0, x = var_301_cast_fp16)[name = string("norm_7_cast_fp16")]; tensor var_303_cast_fp16 = mul(x = x_19_cast_fp16, y = norm_7_cast_fp16)[name = string("op_303_cast_fp16")]; tensor layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6303296)))]; tensor var_304_cast_fp16 = mul(x = var_303_cast_fp16, y = layers_0_post_attention_layernorm_weight_to_fp16)[name = string("op_304_cast_fp16")]; tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6305408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451200))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451776)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = var_304_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_314_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_314_cast_fp16")]; tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9457984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12603776))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = var_304_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_9_cast_fp16 = mul(x = var_314_cast_fp16, y = linear_5_cast_fp16)[name = string("input_9_cast_fp16")]; tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12604352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750144))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_6_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_334 = const()[name = string("op_334"), val = int32(-1)]; fp16 var_333_promoted_to_fp16 = const()[name = string("op_333_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_343_cast_fp16 = pow(x = x_25_cast_fp16, y = var_333_promoted_to_fp16)[name = string("op_343_cast_fp16")]; tensor var_345_axes_0 = const()[name = string("op_345_axes_0"), val = tensor([-1])]; bool var_345_keep_dims_0 = const()[name = string("op_345_keep_dims_0"), val = bool(true)]; tensor var_345_cast_fp16 = reduce_mean(axes = var_345_axes_0, keep_dims = var_345_keep_dims_0, x = var_343_cast_fp16)[name = string("op_345_cast_fp16")]; fp16 var_346_to_fp16 = const()[name = string("op_346_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_347_cast_fp16 = add(x = var_345_cast_fp16, y = var_346_to_fp16)[name = string("op_347_cast_fp16")]; fp32 norm_9_epsilon_0 = const()[name = string("norm_9_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_9_cast_fp16 = rsqrt(epsilon = norm_9_epsilon_0, x = var_347_cast_fp16)[name = string("norm_9_cast_fp16")]; tensor var_349_cast_fp16 = mul(x = x_25_cast_fp16, y = norm_9_cast_fp16)[name = string("op_349_cast_fp16")]; tensor layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750720)))]; tensor var_350_cast_fp16 = mul(x = var_349_cast_fp16, y = layers_1_input_layernorm_weight_to_fp16)[name = string("op_350_cast_fp16")]; tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850048))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = var_350_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor var_366 = const()[name = string("op_366"), val = tensor([1, 1, 16, 128])]; tensor var_367_cast_fp16 = reshape(shape = var_366, x = linear_7_cast_fp16)[name = string("op_367_cast_fp16")]; tensor x_31_perm_0 = const()[name = string("x_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899264))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = var_350_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_371 = const()[name = string("op_371"), val = tensor([1, 1, 8, 128])]; tensor var_372_cast_fp16 = reshape(shape = var_371, x = linear_8_cast_fp16)[name = string("op_372_cast_fp16")]; tensor x_35_perm_0 = const()[name = string("x_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19948480))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = var_350_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_376 = const()[name = string("op_376"), val = tensor([1, 1, 8, 128])]; tensor var_377_cast_fp16 = reshape(shape = var_376, x = linear_9_cast_fp16)[name = string("op_377_cast_fp16")]; tensor v_3_perm_0 = const()[name = string("v_3_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_333_promoted_1_to_fp16 = const()[name = string("op_333_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_31_cast_fp16 = transpose(perm = x_31_perm_0, x = var_367_cast_fp16)[name = string("transpose_107")]; tensor var_381_cast_fp16 = pow(x = x_31_cast_fp16, y = var_333_promoted_1_to_fp16)[name = string("op_381_cast_fp16")]; tensor var_383_axes_0 = const()[name = string("op_383_axes_0"), val = tensor([-1])]; bool var_383_keep_dims_0 = const()[name = string("op_383_keep_dims_0"), val = bool(true)]; tensor var_383_cast_fp16 = reduce_mean(axes = var_383_axes_0, keep_dims = var_383_keep_dims_0, x = var_381_cast_fp16)[name = string("op_383_cast_fp16")]; fp16 var_384_to_fp16 = const()[name = string("op_384_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_385_cast_fp16 = add(x = var_383_cast_fp16, y = var_384_to_fp16)[name = string("op_385_cast_fp16")]; fp32 norm_11_epsilon_0 = const()[name = string("norm_11_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_11_cast_fp16 = rsqrt(epsilon = norm_11_epsilon_0, x = var_385_cast_fp16)[name = string("norm_11_cast_fp16")]; tensor var_387_cast_fp16 = mul(x = x_31_cast_fp16, y = norm_11_cast_fp16)[name = string("op_387_cast_fp16")]; tensor layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949056)))]; tensor var_388_cast_fp16 = mul(x = var_387_cast_fp16, y = layers_1_self_attn_q_norm_weight_to_fp16)[name = string("op_388_cast_fp16")]; fp16 var_333_promoted_2_to_fp16 = const()[name = string("op_333_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_35_cast_fp16 = transpose(perm = x_35_perm_0, x = var_372_cast_fp16)[name = string("transpose_106")]; tensor var_392_cast_fp16 = pow(x = x_35_cast_fp16, y = var_333_promoted_2_to_fp16)[name = string("op_392_cast_fp16")]; tensor var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor([-1])]; bool var_394_keep_dims_0 = const()[name = string("op_394_keep_dims_0"), val = bool(true)]; tensor var_394_cast_fp16 = reduce_mean(axes = var_394_axes_0, keep_dims = var_394_keep_dims_0, x = var_392_cast_fp16)[name = string("op_394_cast_fp16")]; fp16 var_395_to_fp16 = const()[name = string("op_395_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_396_cast_fp16 = add(x = var_394_cast_fp16, y = var_395_to_fp16)[name = string("op_396_cast_fp16")]; fp32 norm_13_epsilon_0 = const()[name = string("norm_13_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_13_cast_fp16 = rsqrt(epsilon = norm_13_epsilon_0, x = var_396_cast_fp16)[name = string("norm_13_cast_fp16")]; tensor var_398_cast_fp16 = mul(x = x_35_cast_fp16, y = norm_13_cast_fp16)[name = string("op_398_cast_fp16")]; tensor layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949376)))]; tensor var_399_cast_fp16 = mul(x = var_398_cast_fp16, y = layers_1_self_attn_k_norm_weight_to_fp16)[name = string("op_399_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_388_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_388_cast_fp16)[name = string("x2_5_cast_fp16")]; tensor var_417_cast_fp16 = mul(x = x1_5_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_417_cast_fp16")]; tensor var_418_cast_fp16 = mul(x = x2_5_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_418_cast_fp16")]; tensor var_419_cast_fp16 = sub(x = var_417_cast_fp16, y = var_418_cast_fp16)[name = string("op_419_cast_fp16")]; tensor var_420_cast_fp16 = mul(x = x2_5_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_420_cast_fp16")]; tensor var_421_cast_fp16 = mul(x = x1_5_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_421_cast_fp16")]; tensor var_422_cast_fp16 = add(x = var_420_cast_fp16, y = var_421_cast_fp16)[name = string("op_422_cast_fp16")]; bool q_3_interleave_0 = const()[name = string("q_3_interleave_0"), val = bool(false)]; tensor q_3_cast_fp16 = concat(axis = var_334, interleave = q_3_interleave_0, values = (var_419_cast_fp16, var_422_cast_fp16))[name = string("q_3_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_399_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_399_cast_fp16)[name = string("x2_7_cast_fp16")]; tensor var_441_cast_fp16 = mul(x = x1_7_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_441_cast_fp16")]; tensor var_442_cast_fp16 = mul(x = x2_7_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_442_cast_fp16")]; tensor var_443_cast_fp16 = sub(x = var_441_cast_fp16, y = var_442_cast_fp16)[name = string("op_443_cast_fp16")]; tensor var_444_cast_fp16 = mul(x = x2_7_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_444_cast_fp16")]; tensor var_445_cast_fp16 = mul(x = x1_7_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_445_cast_fp16")]; tensor var_446_cast_fp16 = add(x = var_444_cast_fp16, y = var_445_cast_fp16)[name = string("op_446_cast_fp16")]; bool k_3_interleave_0 = const()[name = string("k_3_interleave_0"), val = bool(false)]; tensor k_3_cast_fp16 = concat(axis = var_334, interleave = k_3_interleave_0, values = (var_443_cast_fp16, var_446_cast_fp16))[name = string("k_3_cast_fp16")]; tensor read_state_2 = read_state(input = k_cache_1)[name = string("read_state_2")]; tensor k_cache_9_cast_fp16 = mul(x = read_state_2, y = var_264_cast_fp16)[name = string("k_cache_9_cast_fp16")]; write_state(data = k_cache_9_cast_fp16, input = k_cache_1)[name = string("coreml_update_state_116_write_state")]; tensor coreml_update_state_116 = read_state(input = k_cache_1)[name = string("coreml_update_state_116")]; tensor var_451_cast_fp16 = mul(x = k_3_cast_fp16, y = onehot_cast_fp16)[name = string("op_451_cast_fp16")]; tensor k_cache_11_cast_fp16 = add(x = coreml_update_state_116, y = var_451_cast_fp16)[name = string("k_cache_11_cast_fp16")]; write_state(data = k_cache_11_cast_fp16, input = k_cache_1)[name = string("coreml_update_state_117_write_state")]; tensor coreml_update_state_117 = read_state(input = k_cache_1)[name = string("coreml_update_state_117")]; tensor read_state_3 = read_state(input = v_cache_1)[name = string("read_state_3")]; tensor v_cache_9_cast_fp16 = mul(x = read_state_3, y = var_264_cast_fp16)[name = string("v_cache_9_cast_fp16")]; write_state(data = v_cache_9_cast_fp16, input = v_cache_1)[name = string("coreml_update_state_118_write_state")]; tensor coreml_update_state_118 = read_state(input = v_cache_1)[name = string("coreml_update_state_118")]; tensor v_3_cast_fp16 = transpose(perm = v_3_perm_0, x = var_377_cast_fp16)[name = string("transpose_105")]; tensor var_455_cast_fp16 = mul(x = v_3_cast_fp16, y = onehot_cast_fp16)[name = string("op_455_cast_fp16")]; tensor v_cache_11_cast_fp16 = add(x = coreml_update_state_118, y = var_455_cast_fp16)[name = string("v_cache_11_cast_fp16")]; write_state(data = v_cache_11_cast_fp16, input = v_cache_1)[name = string("coreml_update_state_119_write_state")]; tensor coreml_update_state_119 = read_state(input = v_cache_1)[name = string("coreml_update_state_119")]; tensor var_457_axes_0 = const()[name = string("op_457_axes_0"), val = tensor([2])]; tensor var_457_cast_fp16 = expand_dims(axes = var_457_axes_0, x = coreml_update_state_117)[name = string("op_457_cast_fp16")]; tensor k_exp_5_reps_0 = const()[name = string("k_exp_5_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_5_cast_fp16 = tile(reps = k_exp_5_reps_0, x = var_457_cast_fp16)[name = string("k_exp_5_cast_fp16")]; tensor var_460 = const()[name = string("op_460"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_7_cast_fp16 = reshape(shape = var_460, x = k_exp_5_cast_fp16)[name = string("k_exp_7_cast_fp16")]; tensor var_462_axes_0 = const()[name = string("op_462_axes_0"), val = tensor([2])]; tensor var_462_cast_fp16 = expand_dims(axes = var_462_axes_0, x = coreml_update_state_119)[name = string("op_462_cast_fp16")]; tensor v_exp_5_reps_0 = const()[name = string("v_exp_5_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_5_cast_fp16 = tile(reps = v_exp_5_reps_0, x = var_462_cast_fp16)[name = string("v_exp_5_cast_fp16")]; tensor var_465 = const()[name = string("op_465"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_7_cast_fp16 = reshape(shape = var_465, x = v_exp_5_cast_fp16)[name = string("v_exp_7_cast_fp16")]; bool var_468_transpose_x_1 = const()[name = string("op_468_transpose_x_1"), val = bool(false)]; bool var_468_transpose_y_1 = const()[name = string("op_468_transpose_y_1"), val = bool(true)]; tensor var_468_cast_fp16 = matmul(transpose_x = var_468_transpose_x_1, transpose_y = var_468_transpose_y_1, x = q_3_cast_fp16, y = k_exp_7_cast_fp16)[name = string("op_468_cast_fp16")]; fp16 var_469_to_fp16 = const()[name = string("op_469_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_5_cast_fp16 = mul(x = var_468_cast_fp16, y = var_469_to_fp16)[name = string("attn_5_cast_fp16")]; tensor input_11_cast_fp16 = add(x = attn_5_cast_fp16, y = attention_mask_to_fp16)[name = string("input_11_cast_fp16")]; tensor attn_7_cast_fp16 = softmax(axis = var_334, x = input_11_cast_fp16)[name = string("attn_7_cast_fp16")]; bool out_3_transpose_x_0 = const()[name = string("out_3_transpose_x_0"), val = bool(false)]; bool out_3_transpose_y_0 = const()[name = string("out_3_transpose_y_0"), val = bool(false)]; tensor out_3_cast_fp16 = matmul(transpose_x = out_3_transpose_x_0, transpose_y = out_3_transpose_y_0, x = attn_7_cast_fp16, y = v_exp_7_cast_fp16)[name = string("out_3_cast_fp16")]; tensor var_474_perm_0 = const()[name = string("op_474_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_475 = const()[name = string("op_475"), val = tensor([1, 1, -1])]; tensor var_474_cast_fp16 = transpose(perm = var_474_perm_0, x = out_3_cast_fp16)[name = string("transpose_104")]; tensor input_13_cast_fp16 = reshape(shape = var_475, x = var_474_cast_fp16)[name = string("input_13_cast_fp16")]; tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22046912))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor x_45_cast_fp16 = add(x = x_25_cast_fp16, y = linear_10_cast_fp16)[name = string("x_45_cast_fp16")]; fp16 var_333_promoted_3_to_fp16 = const()[name = string("op_333_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_482_cast_fp16 = pow(x = x_45_cast_fp16, y = var_333_promoted_3_to_fp16)[name = string("op_482_cast_fp16")]; tensor var_484_axes_0 = const()[name = string("op_484_axes_0"), val = tensor([-1])]; bool var_484_keep_dims_0 = const()[name = string("op_484_keep_dims_0"), val = bool(true)]; tensor var_484_cast_fp16 = reduce_mean(axes = var_484_axes_0, keep_dims = var_484_keep_dims_0, x = var_482_cast_fp16)[name = string("op_484_cast_fp16")]; fp16 var_485_to_fp16 = const()[name = string("op_485_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_486_cast_fp16 = add(x = var_484_cast_fp16, y = var_485_to_fp16)[name = string("op_486_cast_fp16")]; fp32 norm_15_epsilon_0 = const()[name = string("norm_15_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_15_cast_fp16 = rsqrt(epsilon = norm_15_epsilon_0, x = var_486_cast_fp16)[name = string("norm_15_cast_fp16")]; tensor var_488_cast_fp16 = mul(x = x_45_cast_fp16, y = norm_15_cast_fp16)[name = string("op_488_cast_fp16")]; tensor layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22047488)))]; tensor var_489_cast_fp16 = mul(x = var_488_cast_fp16, y = layers_1_post_attention_layernorm_weight_to_fp16)[name = string("op_489_cast_fp16")]; tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22049600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195392))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = var_489_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_499_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_499_cast_fp16")]; tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28341760))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = var_489_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_19_cast_fp16 = mul(x = var_499_cast_fp16, y = linear_12_cast_fp16)[name = string("input_19_cast_fp16")]; tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28342336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488128))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_13_cast_fp16)[name = string("x_51_cast_fp16")]; int32 var_519 = const()[name = string("op_519"), val = int32(-1)]; fp16 var_518_promoted_to_fp16 = const()[name = string("op_518_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_528_cast_fp16 = pow(x = x_51_cast_fp16, y = var_518_promoted_to_fp16)[name = string("op_528_cast_fp16")]; tensor var_530_axes_0 = const()[name = string("op_530_axes_0"), val = tensor([-1])]; bool var_530_keep_dims_0 = const()[name = string("op_530_keep_dims_0"), val = bool(true)]; tensor var_530_cast_fp16 = reduce_mean(axes = var_530_axes_0, keep_dims = var_530_keep_dims_0, x = var_528_cast_fp16)[name = string("op_530_cast_fp16")]; fp16 var_531_to_fp16 = const()[name = string("op_531_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_532_cast_fp16 = add(x = var_530_cast_fp16, y = var_531_to_fp16)[name = string("op_532_cast_fp16")]; fp32 norm_17_epsilon_0 = const()[name = string("norm_17_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_17_cast_fp16 = rsqrt(epsilon = norm_17_epsilon_0, x = var_532_cast_fp16)[name = string("norm_17_cast_fp16")]; tensor var_534_cast_fp16 = mul(x = x_51_cast_fp16, y = norm_17_cast_fp16)[name = string("op_534_cast_fp16")]; tensor layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488704)))]; tensor var_535_cast_fp16 = mul(x = var_534_cast_fp16, y = layers_2_input_layernorm_weight_to_fp16)[name = string("op_535_cast_fp16")]; tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588032))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = var_535_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor var_551 = const()[name = string("op_551"), val = tensor([1, 1, 16, 128])]; tensor var_552_cast_fp16 = reshape(shape = var_551, x = linear_14_cast_fp16)[name = string("op_552_cast_fp16")]; tensor x_57_perm_0 = const()[name = string("x_57_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637248))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = var_535_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor var_556 = const()[name = string("op_556"), val = tensor([1, 1, 8, 128])]; tensor var_557_cast_fp16 = reshape(shape = var_556, x = linear_15_cast_fp16)[name = string("op_557_cast_fp16")]; tensor x_61_perm_0 = const()[name = string("x_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35686464))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = var_535_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_561 = const()[name = string("op_561"), val = tensor([1, 1, 8, 128])]; tensor var_562_cast_fp16 = reshape(shape = var_561, x = linear_16_cast_fp16)[name = string("op_562_cast_fp16")]; tensor v_5_perm_0 = const()[name = string("v_5_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_518_promoted_1_to_fp16 = const()[name = string("op_518_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_57_cast_fp16 = transpose(perm = x_57_perm_0, x = var_552_cast_fp16)[name = string("transpose_103")]; tensor var_566_cast_fp16 = pow(x = x_57_cast_fp16, y = var_518_promoted_1_to_fp16)[name = string("op_566_cast_fp16")]; tensor var_568_axes_0 = const()[name = string("op_568_axes_0"), val = tensor([-1])]; bool var_568_keep_dims_0 = const()[name = string("op_568_keep_dims_0"), val = bool(true)]; tensor var_568_cast_fp16 = reduce_mean(axes = var_568_axes_0, keep_dims = var_568_keep_dims_0, x = var_566_cast_fp16)[name = string("op_568_cast_fp16")]; fp16 var_569_to_fp16 = const()[name = string("op_569_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_570_cast_fp16 = add(x = var_568_cast_fp16, y = var_569_to_fp16)[name = string("op_570_cast_fp16")]; fp32 norm_19_epsilon_0 = const()[name = string("norm_19_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_19_cast_fp16 = rsqrt(epsilon = norm_19_epsilon_0, x = var_570_cast_fp16)[name = string("norm_19_cast_fp16")]; tensor var_572_cast_fp16 = mul(x = x_57_cast_fp16, y = norm_19_cast_fp16)[name = string("op_572_cast_fp16")]; tensor layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687040)))]; tensor var_573_cast_fp16 = mul(x = var_572_cast_fp16, y = layers_2_self_attn_q_norm_weight_to_fp16)[name = string("op_573_cast_fp16")]; fp16 var_518_promoted_2_to_fp16 = const()[name = string("op_518_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_61_cast_fp16 = transpose(perm = x_61_perm_0, x = var_557_cast_fp16)[name = string("transpose_102")]; tensor var_577_cast_fp16 = pow(x = x_61_cast_fp16, y = var_518_promoted_2_to_fp16)[name = string("op_577_cast_fp16")]; tensor var_579_axes_0 = const()[name = string("op_579_axes_0"), val = tensor([-1])]; bool var_579_keep_dims_0 = const()[name = string("op_579_keep_dims_0"), val = bool(true)]; tensor var_579_cast_fp16 = reduce_mean(axes = var_579_axes_0, keep_dims = var_579_keep_dims_0, x = var_577_cast_fp16)[name = string("op_579_cast_fp16")]; fp16 var_580_to_fp16 = const()[name = string("op_580_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_581_cast_fp16 = add(x = var_579_cast_fp16, y = var_580_to_fp16)[name = string("op_581_cast_fp16")]; fp32 norm_21_epsilon_0 = const()[name = string("norm_21_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_21_cast_fp16 = rsqrt(epsilon = norm_21_epsilon_0, x = var_581_cast_fp16)[name = string("norm_21_cast_fp16")]; tensor var_583_cast_fp16 = mul(x = x_61_cast_fp16, y = norm_21_cast_fp16)[name = string("op_583_cast_fp16")]; tensor layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687360)))]; tensor var_584_cast_fp16 = mul(x = var_583_cast_fp16, y = layers_2_self_attn_k_norm_weight_to_fp16)[name = string("op_584_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_573_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_573_cast_fp16)[name = string("x2_9_cast_fp16")]; tensor var_602_cast_fp16 = mul(x = x1_9_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_602_cast_fp16")]; tensor var_603_cast_fp16 = mul(x = x2_9_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_603_cast_fp16")]; tensor var_604_cast_fp16 = sub(x = var_602_cast_fp16, y = var_603_cast_fp16)[name = string("op_604_cast_fp16")]; tensor var_605_cast_fp16 = mul(x = x2_9_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_605_cast_fp16")]; tensor var_606_cast_fp16 = mul(x = x1_9_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_606_cast_fp16")]; tensor var_607_cast_fp16 = add(x = var_605_cast_fp16, y = var_606_cast_fp16)[name = string("op_607_cast_fp16")]; bool q_5_interleave_0 = const()[name = string("q_5_interleave_0"), val = bool(false)]; tensor q_5_cast_fp16 = concat(axis = var_519, interleave = q_5_interleave_0, values = (var_604_cast_fp16, var_607_cast_fp16))[name = string("q_5_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_584_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_584_cast_fp16)[name = string("x2_11_cast_fp16")]; tensor var_626_cast_fp16 = mul(x = x1_11_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_626_cast_fp16")]; tensor var_627_cast_fp16 = mul(x = x2_11_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_627_cast_fp16")]; tensor var_628_cast_fp16 = sub(x = var_626_cast_fp16, y = var_627_cast_fp16)[name = string("op_628_cast_fp16")]; tensor var_629_cast_fp16 = mul(x = x2_11_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_629_cast_fp16")]; tensor var_630_cast_fp16 = mul(x = x1_11_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_630_cast_fp16")]; tensor var_631_cast_fp16 = add(x = var_629_cast_fp16, y = var_630_cast_fp16)[name = string("op_631_cast_fp16")]; bool k_5_interleave_0 = const()[name = string("k_5_interleave_0"), val = bool(false)]; tensor k_5_cast_fp16 = concat(axis = var_519, interleave = k_5_interleave_0, values = (var_628_cast_fp16, var_631_cast_fp16))[name = string("k_5_cast_fp16")]; tensor read_state_4 = read_state(input = k_cache_2)[name = string("read_state_4")]; tensor k_cache_15_cast_fp16 = mul(x = read_state_4, y = var_264_cast_fp16)[name = string("k_cache_15_cast_fp16")]; write_state(data = k_cache_15_cast_fp16, input = k_cache_2)[name = string("coreml_update_state_120_write_state")]; tensor coreml_update_state_120 = read_state(input = k_cache_2)[name = string("coreml_update_state_120")]; tensor var_636_cast_fp16 = mul(x = k_5_cast_fp16, y = onehot_cast_fp16)[name = string("op_636_cast_fp16")]; tensor k_cache_17_cast_fp16 = add(x = coreml_update_state_120, y = var_636_cast_fp16)[name = string("k_cache_17_cast_fp16")]; write_state(data = k_cache_17_cast_fp16, input = k_cache_2)[name = string("coreml_update_state_121_write_state")]; tensor coreml_update_state_121 = read_state(input = k_cache_2)[name = string("coreml_update_state_121")]; tensor read_state_5 = read_state(input = v_cache_2)[name = string("read_state_5")]; tensor v_cache_15_cast_fp16 = mul(x = read_state_5, y = var_264_cast_fp16)[name = string("v_cache_15_cast_fp16")]; write_state(data = v_cache_15_cast_fp16, input = v_cache_2)[name = string("coreml_update_state_122_write_state")]; tensor coreml_update_state_122 = read_state(input = v_cache_2)[name = string("coreml_update_state_122")]; tensor v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = var_562_cast_fp16)[name = string("transpose_101")]; tensor var_640_cast_fp16 = mul(x = v_5_cast_fp16, y = onehot_cast_fp16)[name = string("op_640_cast_fp16")]; tensor v_cache_17_cast_fp16 = add(x = coreml_update_state_122, y = var_640_cast_fp16)[name = string("v_cache_17_cast_fp16")]; write_state(data = v_cache_17_cast_fp16, input = v_cache_2)[name = string("coreml_update_state_123_write_state")]; tensor coreml_update_state_123 = read_state(input = v_cache_2)[name = string("coreml_update_state_123")]; tensor var_642_axes_0 = const()[name = string("op_642_axes_0"), val = tensor([2])]; tensor var_642_cast_fp16 = expand_dims(axes = var_642_axes_0, x = coreml_update_state_121)[name = string("op_642_cast_fp16")]; tensor k_exp_9_reps_0 = const()[name = string("k_exp_9_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_9_cast_fp16 = tile(reps = k_exp_9_reps_0, x = var_642_cast_fp16)[name = string("k_exp_9_cast_fp16")]; tensor var_645 = const()[name = string("op_645"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_11_cast_fp16 = reshape(shape = var_645, x = k_exp_9_cast_fp16)[name = string("k_exp_11_cast_fp16")]; tensor var_647_axes_0 = const()[name = string("op_647_axes_0"), val = tensor([2])]; tensor var_647_cast_fp16 = expand_dims(axes = var_647_axes_0, x = coreml_update_state_123)[name = string("op_647_cast_fp16")]; tensor v_exp_9_reps_0 = const()[name = string("v_exp_9_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_9_cast_fp16 = tile(reps = v_exp_9_reps_0, x = var_647_cast_fp16)[name = string("v_exp_9_cast_fp16")]; tensor var_650 = const()[name = string("op_650"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_11_cast_fp16 = reshape(shape = var_650, x = v_exp_9_cast_fp16)[name = string("v_exp_11_cast_fp16")]; bool var_653_transpose_x_1 = const()[name = string("op_653_transpose_x_1"), val = bool(false)]; bool var_653_transpose_y_1 = const()[name = string("op_653_transpose_y_1"), val = bool(true)]; tensor var_653_cast_fp16 = matmul(transpose_x = var_653_transpose_x_1, transpose_y = var_653_transpose_y_1, x = q_5_cast_fp16, y = k_exp_11_cast_fp16)[name = string("op_653_cast_fp16")]; fp16 var_654_to_fp16 = const()[name = string("op_654_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_9_cast_fp16 = mul(x = var_653_cast_fp16, y = var_654_to_fp16)[name = string("attn_9_cast_fp16")]; tensor input_21_cast_fp16 = add(x = attn_9_cast_fp16, y = attention_mask_to_fp16)[name = string("input_21_cast_fp16")]; tensor attn_11_cast_fp16 = softmax(axis = var_519, x = input_21_cast_fp16)[name = string("attn_11_cast_fp16")]; bool out_5_transpose_x_0 = const()[name = string("out_5_transpose_x_0"), val = bool(false)]; bool out_5_transpose_y_0 = const()[name = string("out_5_transpose_y_0"), val = bool(false)]; tensor out_5_cast_fp16 = matmul(transpose_x = out_5_transpose_x_0, transpose_y = out_5_transpose_y_0, x = attn_11_cast_fp16, y = v_exp_11_cast_fp16)[name = string("out_5_cast_fp16")]; tensor var_659_perm_0 = const()[name = string("op_659_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_660 = const()[name = string("op_660"), val = tensor([1, 1, -1])]; tensor var_659_cast_fp16 = transpose(perm = var_659_perm_0, x = out_5_cast_fp16)[name = string("transpose_100")]; tensor input_23_cast_fp16 = reshape(shape = var_660, x = var_659_cast_fp16)[name = string("input_23_cast_fp16")]; tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37784896))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_51_cast_fp16, y = linear_17_cast_fp16)[name = string("x_71_cast_fp16")]; fp16 var_518_promoted_3_to_fp16 = const()[name = string("op_518_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_667_cast_fp16 = pow(x = x_71_cast_fp16, y = var_518_promoted_3_to_fp16)[name = string("op_667_cast_fp16")]; tensor var_669_axes_0 = const()[name = string("op_669_axes_0"), val = tensor([-1])]; bool var_669_keep_dims_0 = const()[name = string("op_669_keep_dims_0"), val = bool(true)]; tensor var_669_cast_fp16 = reduce_mean(axes = var_669_axes_0, keep_dims = var_669_keep_dims_0, x = var_667_cast_fp16)[name = string("op_669_cast_fp16")]; fp16 var_670_to_fp16 = const()[name = string("op_670_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_671_cast_fp16 = add(x = var_669_cast_fp16, y = var_670_to_fp16)[name = string("op_671_cast_fp16")]; fp32 norm_23_epsilon_0 = const()[name = string("norm_23_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_23_cast_fp16 = rsqrt(epsilon = norm_23_epsilon_0, x = var_671_cast_fp16)[name = string("norm_23_cast_fp16")]; tensor var_673_cast_fp16 = mul(x = x_71_cast_fp16, y = norm_23_cast_fp16)[name = string("op_673_cast_fp16")]; tensor layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37785472)))]; tensor var_674_cast_fp16 = mul(x = var_673_cast_fp16, y = layers_2_post_attention_layernorm_weight_to_fp16)[name = string("op_674_cast_fp16")]; tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37787584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933376))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = var_674_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_684_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_684_cast_fp16")]; tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44079744))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = var_674_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_29_cast_fp16 = mul(x = var_684_cast_fp16, y = linear_19_cast_fp16)[name = string("input_29_cast_fp16")]; tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44080320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226112))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor x_77_cast_fp16 = add(x = x_71_cast_fp16, y = linear_20_cast_fp16)[name = string("x_77_cast_fp16")]; int32 var_704 = const()[name = string("op_704"), val = int32(-1)]; fp16 var_703_promoted_to_fp16 = const()[name = string("op_703_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_713_cast_fp16 = pow(x = x_77_cast_fp16, y = var_703_promoted_to_fp16)[name = string("op_713_cast_fp16")]; tensor var_715_axes_0 = const()[name = string("op_715_axes_0"), val = tensor([-1])]; bool var_715_keep_dims_0 = const()[name = string("op_715_keep_dims_0"), val = bool(true)]; tensor var_715_cast_fp16 = reduce_mean(axes = var_715_axes_0, keep_dims = var_715_keep_dims_0, x = var_713_cast_fp16)[name = string("op_715_cast_fp16")]; fp16 var_716_to_fp16 = const()[name = string("op_716_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_717_cast_fp16 = add(x = var_715_cast_fp16, y = var_716_to_fp16)[name = string("op_717_cast_fp16")]; fp32 norm_25_epsilon_0 = const()[name = string("norm_25_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_25_cast_fp16 = rsqrt(epsilon = norm_25_epsilon_0, x = var_717_cast_fp16)[name = string("norm_25_cast_fp16")]; tensor var_719_cast_fp16 = mul(x = x_77_cast_fp16, y = norm_25_cast_fp16)[name = string("op_719_cast_fp16")]; tensor layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226688)))]; tensor var_720_cast_fp16 = mul(x = var_719_cast_fp16, y = layers_3_input_layernorm_weight_to_fp16)[name = string("op_720_cast_fp16")]; tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326016))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = var_720_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor var_736 = const()[name = string("op_736"), val = tensor([1, 1, 16, 128])]; tensor var_737_cast_fp16 = reshape(shape = var_736, x = linear_21_cast_fp16)[name = string("op_737_cast_fp16")]; tensor x_83_perm_0 = const()[name = string("x_83_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375232))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = var_720_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor var_741 = const()[name = string("op_741"), val = tensor([1, 1, 8, 128])]; tensor var_742_cast_fp16 = reshape(shape = var_741, x = linear_22_cast_fp16)[name = string("op_742_cast_fp16")]; tensor x_87_perm_0 = const()[name = string("x_87_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51424448))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = var_720_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor var_746 = const()[name = string("op_746"), val = tensor([1, 1, 8, 128])]; tensor var_747_cast_fp16 = reshape(shape = var_746, x = linear_23_cast_fp16)[name = string("op_747_cast_fp16")]; tensor v_7_perm_0 = const()[name = string("v_7_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_703_promoted_1_to_fp16 = const()[name = string("op_703_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_83_cast_fp16 = transpose(perm = x_83_perm_0, x = var_737_cast_fp16)[name = string("transpose_99")]; tensor var_751_cast_fp16 = pow(x = x_83_cast_fp16, y = var_703_promoted_1_to_fp16)[name = string("op_751_cast_fp16")]; tensor var_753_axes_0 = const()[name = string("op_753_axes_0"), val = tensor([-1])]; bool var_753_keep_dims_0 = const()[name = string("op_753_keep_dims_0"), val = bool(true)]; tensor var_753_cast_fp16 = reduce_mean(axes = var_753_axes_0, keep_dims = var_753_keep_dims_0, x = var_751_cast_fp16)[name = string("op_753_cast_fp16")]; fp16 var_754_to_fp16 = const()[name = string("op_754_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_755_cast_fp16 = add(x = var_753_cast_fp16, y = var_754_to_fp16)[name = string("op_755_cast_fp16")]; fp32 norm_27_epsilon_0 = const()[name = string("norm_27_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_27_cast_fp16 = rsqrt(epsilon = norm_27_epsilon_0, x = var_755_cast_fp16)[name = string("norm_27_cast_fp16")]; tensor var_757_cast_fp16 = mul(x = x_83_cast_fp16, y = norm_27_cast_fp16)[name = string("op_757_cast_fp16")]; tensor layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425024)))]; tensor var_758_cast_fp16 = mul(x = var_757_cast_fp16, y = layers_3_self_attn_q_norm_weight_to_fp16)[name = string("op_758_cast_fp16")]; fp16 var_703_promoted_2_to_fp16 = const()[name = string("op_703_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_87_cast_fp16 = transpose(perm = x_87_perm_0, x = var_742_cast_fp16)[name = string("transpose_98")]; tensor var_762_cast_fp16 = pow(x = x_87_cast_fp16, y = var_703_promoted_2_to_fp16)[name = string("op_762_cast_fp16")]; tensor var_764_axes_0 = const()[name = string("op_764_axes_0"), val = tensor([-1])]; bool var_764_keep_dims_0 = const()[name = string("op_764_keep_dims_0"), val = bool(true)]; tensor var_764_cast_fp16 = reduce_mean(axes = var_764_axes_0, keep_dims = var_764_keep_dims_0, x = var_762_cast_fp16)[name = string("op_764_cast_fp16")]; fp16 var_765_to_fp16 = const()[name = string("op_765_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_766_cast_fp16 = add(x = var_764_cast_fp16, y = var_765_to_fp16)[name = string("op_766_cast_fp16")]; fp32 norm_29_epsilon_0 = const()[name = string("norm_29_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_29_cast_fp16 = rsqrt(epsilon = norm_29_epsilon_0, x = var_766_cast_fp16)[name = string("norm_29_cast_fp16")]; tensor var_768_cast_fp16 = mul(x = x_87_cast_fp16, y = norm_29_cast_fp16)[name = string("op_768_cast_fp16")]; tensor layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425344)))]; tensor var_769_cast_fp16 = mul(x = var_768_cast_fp16, y = layers_3_self_attn_k_norm_weight_to_fp16)[name = string("op_769_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_758_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_758_cast_fp16)[name = string("x2_13_cast_fp16")]; tensor var_787_cast_fp16 = mul(x = x1_13_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_787_cast_fp16")]; tensor var_788_cast_fp16 = mul(x = x2_13_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_788_cast_fp16")]; tensor var_789_cast_fp16 = sub(x = var_787_cast_fp16, y = var_788_cast_fp16)[name = string("op_789_cast_fp16")]; tensor var_790_cast_fp16 = mul(x = x2_13_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_790_cast_fp16")]; tensor var_791_cast_fp16 = mul(x = x1_13_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_791_cast_fp16")]; tensor var_792_cast_fp16 = add(x = var_790_cast_fp16, y = var_791_cast_fp16)[name = string("op_792_cast_fp16")]; bool q_7_interleave_0 = const()[name = string("q_7_interleave_0"), val = bool(false)]; tensor q_7_cast_fp16 = concat(axis = var_704, interleave = q_7_interleave_0, values = (var_789_cast_fp16, var_792_cast_fp16))[name = string("q_7_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_769_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_769_cast_fp16)[name = string("x2_15_cast_fp16")]; tensor var_811_cast_fp16 = mul(x = x1_15_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_811_cast_fp16")]; tensor var_812_cast_fp16 = mul(x = x2_15_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_812_cast_fp16")]; tensor var_813_cast_fp16 = sub(x = var_811_cast_fp16, y = var_812_cast_fp16)[name = string("op_813_cast_fp16")]; tensor var_814_cast_fp16 = mul(x = x2_15_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_814_cast_fp16")]; tensor var_815_cast_fp16 = mul(x = x1_15_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_815_cast_fp16")]; tensor var_816_cast_fp16 = add(x = var_814_cast_fp16, y = var_815_cast_fp16)[name = string("op_816_cast_fp16")]; bool k_7_interleave_0 = const()[name = string("k_7_interleave_0"), val = bool(false)]; tensor k_7_cast_fp16 = concat(axis = var_704, interleave = k_7_interleave_0, values = (var_813_cast_fp16, var_816_cast_fp16))[name = string("k_7_cast_fp16")]; tensor read_state_6 = read_state(input = k_cache_3)[name = string("read_state_6")]; tensor k_cache_21_cast_fp16 = mul(x = read_state_6, y = var_264_cast_fp16)[name = string("k_cache_21_cast_fp16")]; write_state(data = k_cache_21_cast_fp16, input = k_cache_3)[name = string("coreml_update_state_124_write_state")]; tensor coreml_update_state_124 = read_state(input = k_cache_3)[name = string("coreml_update_state_124")]; tensor var_821_cast_fp16 = mul(x = k_7_cast_fp16, y = onehot_cast_fp16)[name = string("op_821_cast_fp16")]; tensor k_cache_23_cast_fp16 = add(x = coreml_update_state_124, y = var_821_cast_fp16)[name = string("k_cache_23_cast_fp16")]; write_state(data = k_cache_23_cast_fp16, input = k_cache_3)[name = string("coreml_update_state_125_write_state")]; tensor coreml_update_state_125 = read_state(input = k_cache_3)[name = string("coreml_update_state_125")]; tensor read_state_7 = read_state(input = v_cache_3)[name = string("read_state_7")]; tensor v_cache_21_cast_fp16 = mul(x = read_state_7, y = var_264_cast_fp16)[name = string("v_cache_21_cast_fp16")]; write_state(data = v_cache_21_cast_fp16, input = v_cache_3)[name = string("coreml_update_state_126_write_state")]; tensor coreml_update_state_126 = read_state(input = v_cache_3)[name = string("coreml_update_state_126")]; tensor v_7_cast_fp16 = transpose(perm = v_7_perm_0, x = var_747_cast_fp16)[name = string("transpose_97")]; tensor var_825_cast_fp16 = mul(x = v_7_cast_fp16, y = onehot_cast_fp16)[name = string("op_825_cast_fp16")]; tensor v_cache_23_cast_fp16 = add(x = coreml_update_state_126, y = var_825_cast_fp16)[name = string("v_cache_23_cast_fp16")]; write_state(data = v_cache_23_cast_fp16, input = v_cache_3)[name = string("coreml_update_state_127_write_state")]; tensor coreml_update_state_127 = read_state(input = v_cache_3)[name = string("coreml_update_state_127")]; tensor var_827_axes_0 = const()[name = string("op_827_axes_0"), val = tensor([2])]; tensor var_827_cast_fp16 = expand_dims(axes = var_827_axes_0, x = coreml_update_state_125)[name = string("op_827_cast_fp16")]; tensor k_exp_13_reps_0 = const()[name = string("k_exp_13_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_13_cast_fp16 = tile(reps = k_exp_13_reps_0, x = var_827_cast_fp16)[name = string("k_exp_13_cast_fp16")]; tensor var_830 = const()[name = string("op_830"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_15_cast_fp16 = reshape(shape = var_830, x = k_exp_13_cast_fp16)[name = string("k_exp_15_cast_fp16")]; tensor var_832_axes_0 = const()[name = string("op_832_axes_0"), val = tensor([2])]; tensor var_832_cast_fp16 = expand_dims(axes = var_832_axes_0, x = coreml_update_state_127)[name = string("op_832_cast_fp16")]; tensor v_exp_13_reps_0 = const()[name = string("v_exp_13_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_13_cast_fp16 = tile(reps = v_exp_13_reps_0, x = var_832_cast_fp16)[name = string("v_exp_13_cast_fp16")]; tensor var_835 = const()[name = string("op_835"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_15_cast_fp16 = reshape(shape = var_835, x = v_exp_13_cast_fp16)[name = string("v_exp_15_cast_fp16")]; bool var_838_transpose_x_1 = const()[name = string("op_838_transpose_x_1"), val = bool(false)]; bool var_838_transpose_y_1 = const()[name = string("op_838_transpose_y_1"), val = bool(true)]; tensor var_838_cast_fp16 = matmul(transpose_x = var_838_transpose_x_1, transpose_y = var_838_transpose_y_1, x = q_7_cast_fp16, y = k_exp_15_cast_fp16)[name = string("op_838_cast_fp16")]; fp16 var_839_to_fp16 = const()[name = string("op_839_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_13_cast_fp16 = mul(x = var_838_cast_fp16, y = var_839_to_fp16)[name = string("attn_13_cast_fp16")]; tensor input_31_cast_fp16 = add(x = attn_13_cast_fp16, y = attention_mask_to_fp16)[name = string("input_31_cast_fp16")]; tensor attn_15_cast_fp16 = softmax(axis = var_704, x = input_31_cast_fp16)[name = string("attn_15_cast_fp16")]; bool out_7_transpose_x_0 = const()[name = string("out_7_transpose_x_0"), val = bool(false)]; bool out_7_transpose_y_0 = const()[name = string("out_7_transpose_y_0"), val = bool(false)]; tensor out_7_cast_fp16 = matmul(transpose_x = out_7_transpose_x_0, transpose_y = out_7_transpose_y_0, x = attn_15_cast_fp16, y = v_exp_15_cast_fp16)[name = string("out_7_cast_fp16")]; tensor var_844_perm_0 = const()[name = string("op_844_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_845 = const()[name = string("op_845"), val = tensor([1, 1, -1])]; tensor var_844_cast_fp16 = transpose(perm = var_844_perm_0, x = out_7_cast_fp16)[name = string("transpose_96")]; tensor input_33_cast_fp16 = reshape(shape = var_845, x = var_844_cast_fp16)[name = string("input_33_cast_fp16")]; tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53522880))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_77_cast_fp16, y = linear_24_cast_fp16)[name = string("x_97_cast_fp16")]; fp16 var_703_promoted_3_to_fp16 = const()[name = string("op_703_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_852_cast_fp16 = pow(x = x_97_cast_fp16, y = var_703_promoted_3_to_fp16)[name = string("op_852_cast_fp16")]; tensor var_854_axes_0 = const()[name = string("op_854_axes_0"), val = tensor([-1])]; bool var_854_keep_dims_0 = const()[name = string("op_854_keep_dims_0"), val = bool(true)]; tensor var_854_cast_fp16 = reduce_mean(axes = var_854_axes_0, keep_dims = var_854_keep_dims_0, x = var_852_cast_fp16)[name = string("op_854_cast_fp16")]; fp16 var_855_to_fp16 = const()[name = string("op_855_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_856_cast_fp16 = add(x = var_854_cast_fp16, y = var_855_to_fp16)[name = string("op_856_cast_fp16")]; fp32 norm_31_epsilon_0 = const()[name = string("norm_31_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_31_cast_fp16 = rsqrt(epsilon = norm_31_epsilon_0, x = var_856_cast_fp16)[name = string("norm_31_cast_fp16")]; tensor var_858_cast_fp16 = mul(x = x_97_cast_fp16, y = norm_31_cast_fp16)[name = string("op_858_cast_fp16")]; tensor layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53523456)))]; tensor var_859_cast_fp16 = mul(x = var_858_cast_fp16, y = layers_3_post_attention_layernorm_weight_to_fp16)[name = string("op_859_cast_fp16")]; tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53525568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671360))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = var_859_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_869_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_869_cast_fp16")]; tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59817728))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = var_859_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_39_cast_fp16 = mul(x = var_869_cast_fp16, y = linear_26_cast_fp16)[name = string("input_39_cast_fp16")]; tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59818304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964096))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_27_cast_fp16)[name = string("x_103_cast_fp16")]; int32 var_889 = const()[name = string("op_889"), val = int32(-1)]; fp16 var_888_promoted_to_fp16 = const()[name = string("op_888_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_898_cast_fp16 = pow(x = x_103_cast_fp16, y = var_888_promoted_to_fp16)[name = string("op_898_cast_fp16")]; tensor var_900_axes_0 = const()[name = string("op_900_axes_0"), val = tensor([-1])]; bool var_900_keep_dims_0 = const()[name = string("op_900_keep_dims_0"), val = bool(true)]; tensor var_900_cast_fp16 = reduce_mean(axes = var_900_axes_0, keep_dims = var_900_keep_dims_0, x = var_898_cast_fp16)[name = string("op_900_cast_fp16")]; fp16 var_901_to_fp16 = const()[name = string("op_901_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_902_cast_fp16 = add(x = var_900_cast_fp16, y = var_901_to_fp16)[name = string("op_902_cast_fp16")]; fp32 norm_33_epsilon_0 = const()[name = string("norm_33_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_33_cast_fp16 = rsqrt(epsilon = norm_33_epsilon_0, x = var_902_cast_fp16)[name = string("norm_33_cast_fp16")]; tensor var_904_cast_fp16 = mul(x = x_103_cast_fp16, y = norm_33_cast_fp16)[name = string("op_904_cast_fp16")]; tensor layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964672)))]; tensor var_905_cast_fp16 = mul(x = var_904_cast_fp16, y = layers_4_input_layernorm_weight_to_fp16)[name = string("op_905_cast_fp16")]; tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064000))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = var_905_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor var_921 = const()[name = string("op_921"), val = tensor([1, 1, 16, 128])]; tensor var_922_cast_fp16 = reshape(shape = var_921, x = linear_28_cast_fp16)[name = string("op_922_cast_fp16")]; tensor x_109_perm_0 = const()[name = string("x_109_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113216))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = var_905_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor var_926 = const()[name = string("op_926"), val = tensor([1, 1, 8, 128])]; tensor var_927_cast_fp16 = reshape(shape = var_926, x = linear_29_cast_fp16)[name = string("op_927_cast_fp16")]; tensor x_113_perm_0 = const()[name = string("x_113_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67162432))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = var_905_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor var_931 = const()[name = string("op_931"), val = tensor([1, 1, 8, 128])]; tensor var_932_cast_fp16 = reshape(shape = var_931, x = linear_30_cast_fp16)[name = string("op_932_cast_fp16")]; tensor v_9_perm_0 = const()[name = string("v_9_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_888_promoted_1_to_fp16 = const()[name = string("op_888_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_109_cast_fp16 = transpose(perm = x_109_perm_0, x = var_922_cast_fp16)[name = string("transpose_95")]; tensor var_936_cast_fp16 = pow(x = x_109_cast_fp16, y = var_888_promoted_1_to_fp16)[name = string("op_936_cast_fp16")]; tensor var_938_axes_0 = const()[name = string("op_938_axes_0"), val = tensor([-1])]; bool var_938_keep_dims_0 = const()[name = string("op_938_keep_dims_0"), val = bool(true)]; tensor var_938_cast_fp16 = reduce_mean(axes = var_938_axes_0, keep_dims = var_938_keep_dims_0, x = var_936_cast_fp16)[name = string("op_938_cast_fp16")]; fp16 var_939_to_fp16 = const()[name = string("op_939_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_940_cast_fp16 = add(x = var_938_cast_fp16, y = var_939_to_fp16)[name = string("op_940_cast_fp16")]; fp32 norm_35_epsilon_0 = const()[name = string("norm_35_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_35_cast_fp16 = rsqrt(epsilon = norm_35_epsilon_0, x = var_940_cast_fp16)[name = string("norm_35_cast_fp16")]; tensor var_942_cast_fp16 = mul(x = x_109_cast_fp16, y = norm_35_cast_fp16)[name = string("op_942_cast_fp16")]; tensor layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163008)))]; tensor var_943_cast_fp16 = mul(x = var_942_cast_fp16, y = layers_4_self_attn_q_norm_weight_to_fp16)[name = string("op_943_cast_fp16")]; fp16 var_888_promoted_2_to_fp16 = const()[name = string("op_888_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_113_cast_fp16 = transpose(perm = x_113_perm_0, x = var_927_cast_fp16)[name = string("transpose_94")]; tensor var_947_cast_fp16 = pow(x = x_113_cast_fp16, y = var_888_promoted_2_to_fp16)[name = string("op_947_cast_fp16")]; tensor var_949_axes_0 = const()[name = string("op_949_axes_0"), val = tensor([-1])]; bool var_949_keep_dims_0 = const()[name = string("op_949_keep_dims_0"), val = bool(true)]; tensor var_949_cast_fp16 = reduce_mean(axes = var_949_axes_0, keep_dims = var_949_keep_dims_0, x = var_947_cast_fp16)[name = string("op_949_cast_fp16")]; fp16 var_950_to_fp16 = const()[name = string("op_950_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_951_cast_fp16 = add(x = var_949_cast_fp16, y = var_950_to_fp16)[name = string("op_951_cast_fp16")]; fp32 norm_37_epsilon_0 = const()[name = string("norm_37_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_37_cast_fp16 = rsqrt(epsilon = norm_37_epsilon_0, x = var_951_cast_fp16)[name = string("norm_37_cast_fp16")]; tensor var_953_cast_fp16 = mul(x = x_113_cast_fp16, y = norm_37_cast_fp16)[name = string("op_953_cast_fp16")]; tensor layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163328)))]; tensor var_954_cast_fp16 = mul(x = var_953_cast_fp16, y = layers_4_self_attn_k_norm_weight_to_fp16)[name = string("op_954_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_943_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_943_cast_fp16)[name = string("x2_17_cast_fp16")]; tensor var_972_cast_fp16 = mul(x = x1_17_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_972_cast_fp16")]; tensor var_973_cast_fp16 = mul(x = x2_17_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_973_cast_fp16")]; tensor var_974_cast_fp16 = sub(x = var_972_cast_fp16, y = var_973_cast_fp16)[name = string("op_974_cast_fp16")]; tensor var_975_cast_fp16 = mul(x = x2_17_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_975_cast_fp16")]; tensor var_976_cast_fp16 = mul(x = x1_17_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_976_cast_fp16")]; tensor var_977_cast_fp16 = add(x = var_975_cast_fp16, y = var_976_cast_fp16)[name = string("op_977_cast_fp16")]; bool q_9_interleave_0 = const()[name = string("q_9_interleave_0"), val = bool(false)]; tensor q_9_cast_fp16 = concat(axis = var_889, interleave = q_9_interleave_0, values = (var_974_cast_fp16, var_977_cast_fp16))[name = string("q_9_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_954_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_954_cast_fp16)[name = string("x2_19_cast_fp16")]; tensor var_996_cast_fp16 = mul(x = x1_19_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_996_cast_fp16")]; tensor var_997_cast_fp16 = mul(x = x2_19_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_997_cast_fp16")]; tensor var_998_cast_fp16 = sub(x = var_996_cast_fp16, y = var_997_cast_fp16)[name = string("op_998_cast_fp16")]; tensor var_999_cast_fp16 = mul(x = x2_19_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_999_cast_fp16")]; tensor var_1000_cast_fp16 = mul(x = x1_19_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1000_cast_fp16")]; tensor var_1001_cast_fp16 = add(x = var_999_cast_fp16, y = var_1000_cast_fp16)[name = string("op_1001_cast_fp16")]; bool k_9_interleave_0 = const()[name = string("k_9_interleave_0"), val = bool(false)]; tensor k_9_cast_fp16 = concat(axis = var_889, interleave = k_9_interleave_0, values = (var_998_cast_fp16, var_1001_cast_fp16))[name = string("k_9_cast_fp16")]; tensor read_state_8 = read_state(input = k_cache_4)[name = string("read_state_8")]; tensor k_cache_27_cast_fp16 = mul(x = read_state_8, y = var_264_cast_fp16)[name = string("k_cache_27_cast_fp16")]; write_state(data = k_cache_27_cast_fp16, input = k_cache_4)[name = string("coreml_update_state_128_write_state")]; tensor coreml_update_state_128 = read_state(input = k_cache_4)[name = string("coreml_update_state_128")]; tensor var_1006_cast_fp16 = mul(x = k_9_cast_fp16, y = onehot_cast_fp16)[name = string("op_1006_cast_fp16")]; tensor k_cache_29_cast_fp16 = add(x = coreml_update_state_128, y = var_1006_cast_fp16)[name = string("k_cache_29_cast_fp16")]; write_state(data = k_cache_29_cast_fp16, input = k_cache_4)[name = string("coreml_update_state_129_write_state")]; tensor coreml_update_state_129 = read_state(input = k_cache_4)[name = string("coreml_update_state_129")]; tensor read_state_9 = read_state(input = v_cache_4)[name = string("read_state_9")]; tensor v_cache_27_cast_fp16 = mul(x = read_state_9, y = var_264_cast_fp16)[name = string("v_cache_27_cast_fp16")]; write_state(data = v_cache_27_cast_fp16, input = v_cache_4)[name = string("coreml_update_state_130_write_state")]; tensor coreml_update_state_130 = read_state(input = v_cache_4)[name = string("coreml_update_state_130")]; tensor v_9_cast_fp16 = transpose(perm = v_9_perm_0, x = var_932_cast_fp16)[name = string("transpose_93")]; tensor var_1010_cast_fp16 = mul(x = v_9_cast_fp16, y = onehot_cast_fp16)[name = string("op_1010_cast_fp16")]; tensor v_cache_29_cast_fp16 = add(x = coreml_update_state_130, y = var_1010_cast_fp16)[name = string("v_cache_29_cast_fp16")]; write_state(data = v_cache_29_cast_fp16, input = v_cache_4)[name = string("coreml_update_state_131_write_state")]; tensor coreml_update_state_131 = read_state(input = v_cache_4)[name = string("coreml_update_state_131")]; tensor var_1012_axes_0 = const()[name = string("op_1012_axes_0"), val = tensor([2])]; tensor var_1012_cast_fp16 = expand_dims(axes = var_1012_axes_0, x = coreml_update_state_129)[name = string("op_1012_cast_fp16")]; tensor k_exp_17_reps_0 = const()[name = string("k_exp_17_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_17_cast_fp16 = tile(reps = k_exp_17_reps_0, x = var_1012_cast_fp16)[name = string("k_exp_17_cast_fp16")]; tensor var_1015 = const()[name = string("op_1015"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_19_cast_fp16 = reshape(shape = var_1015, x = k_exp_17_cast_fp16)[name = string("k_exp_19_cast_fp16")]; tensor var_1017_axes_0 = const()[name = string("op_1017_axes_0"), val = tensor([2])]; tensor var_1017_cast_fp16 = expand_dims(axes = var_1017_axes_0, x = coreml_update_state_131)[name = string("op_1017_cast_fp16")]; tensor v_exp_17_reps_0 = const()[name = string("v_exp_17_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_17_cast_fp16 = tile(reps = v_exp_17_reps_0, x = var_1017_cast_fp16)[name = string("v_exp_17_cast_fp16")]; tensor var_1020 = const()[name = string("op_1020"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_19_cast_fp16 = reshape(shape = var_1020, x = v_exp_17_cast_fp16)[name = string("v_exp_19_cast_fp16")]; bool var_1023_transpose_x_1 = const()[name = string("op_1023_transpose_x_1"), val = bool(false)]; bool var_1023_transpose_y_1 = const()[name = string("op_1023_transpose_y_1"), val = bool(true)]; tensor var_1023_cast_fp16 = matmul(transpose_x = var_1023_transpose_x_1, transpose_y = var_1023_transpose_y_1, x = q_9_cast_fp16, y = k_exp_19_cast_fp16)[name = string("op_1023_cast_fp16")]; fp16 var_1024_to_fp16 = const()[name = string("op_1024_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_17_cast_fp16 = mul(x = var_1023_cast_fp16, y = var_1024_to_fp16)[name = string("attn_17_cast_fp16")]; tensor input_41_cast_fp16 = add(x = attn_17_cast_fp16, y = attention_mask_to_fp16)[name = string("input_41_cast_fp16")]; tensor attn_19_cast_fp16 = softmax(axis = var_889, x = input_41_cast_fp16)[name = string("attn_19_cast_fp16")]; bool out_9_transpose_x_0 = const()[name = string("out_9_transpose_x_0"), val = bool(false)]; bool out_9_transpose_y_0 = const()[name = string("out_9_transpose_y_0"), val = bool(false)]; tensor out_9_cast_fp16 = matmul(transpose_x = out_9_transpose_x_0, transpose_y = out_9_transpose_y_0, x = attn_19_cast_fp16, y = v_exp_19_cast_fp16)[name = string("out_9_cast_fp16")]; tensor var_1029_perm_0 = const()[name = string("op_1029_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1030 = const()[name = string("op_1030"), val = tensor([1, 1, -1])]; tensor var_1029_cast_fp16 = transpose(perm = var_1029_perm_0, x = out_9_cast_fp16)[name = string("transpose_92")]; tensor input_43_cast_fp16 = reshape(shape = var_1030, x = var_1029_cast_fp16)[name = string("input_43_cast_fp16")]; tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69260864))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_103_cast_fp16, y = linear_31_cast_fp16)[name = string("x_123_cast_fp16")]; fp16 var_888_promoted_3_to_fp16 = const()[name = string("op_888_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1037_cast_fp16 = pow(x = x_123_cast_fp16, y = var_888_promoted_3_to_fp16)[name = string("op_1037_cast_fp16")]; tensor var_1039_axes_0 = const()[name = string("op_1039_axes_0"), val = tensor([-1])]; bool var_1039_keep_dims_0 = const()[name = string("op_1039_keep_dims_0"), val = bool(true)]; tensor var_1039_cast_fp16 = reduce_mean(axes = var_1039_axes_0, keep_dims = var_1039_keep_dims_0, x = var_1037_cast_fp16)[name = string("op_1039_cast_fp16")]; fp16 var_1040_to_fp16 = const()[name = string("op_1040_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1041_cast_fp16 = add(x = var_1039_cast_fp16, y = var_1040_to_fp16)[name = string("op_1041_cast_fp16")]; fp32 norm_39_epsilon_0 = const()[name = string("norm_39_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_39_cast_fp16 = rsqrt(epsilon = norm_39_epsilon_0, x = var_1041_cast_fp16)[name = string("norm_39_cast_fp16")]; tensor var_1043_cast_fp16 = mul(x = x_123_cast_fp16, y = norm_39_cast_fp16)[name = string("op_1043_cast_fp16")]; tensor layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69261440)))]; tensor var_1044_cast_fp16 = mul(x = var_1043_cast_fp16, y = layers_4_post_attention_layernorm_weight_to_fp16)[name = string("op_1044_cast_fp16")]; tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69263552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409344))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = var_1044_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1054_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1054_cast_fp16")]; tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75555712))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = var_1044_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_49_cast_fp16 = mul(x = var_1054_cast_fp16, y = linear_33_cast_fp16)[name = string("input_49_cast_fp16")]; tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702080))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_34_cast_fp16)[name = string("x_129_cast_fp16")]; int32 var_1074 = const()[name = string("op_1074"), val = int32(-1)]; fp16 var_1073_promoted_to_fp16 = const()[name = string("op_1073_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1083_cast_fp16 = pow(x = x_129_cast_fp16, y = var_1073_promoted_to_fp16)[name = string("op_1083_cast_fp16")]; tensor var_1085_axes_0 = const()[name = string("op_1085_axes_0"), val = tensor([-1])]; bool var_1085_keep_dims_0 = const()[name = string("op_1085_keep_dims_0"), val = bool(true)]; tensor var_1085_cast_fp16 = reduce_mean(axes = var_1085_axes_0, keep_dims = var_1085_keep_dims_0, x = var_1083_cast_fp16)[name = string("op_1085_cast_fp16")]; fp16 var_1086_to_fp16 = const()[name = string("op_1086_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1087_cast_fp16 = add(x = var_1085_cast_fp16, y = var_1086_to_fp16)[name = string("op_1087_cast_fp16")]; fp32 norm_41_epsilon_0 = const()[name = string("norm_41_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_41_cast_fp16 = rsqrt(epsilon = norm_41_epsilon_0, x = var_1087_cast_fp16)[name = string("norm_41_cast_fp16")]; tensor var_1089_cast_fp16 = mul(x = x_129_cast_fp16, y = norm_41_cast_fp16)[name = string("op_1089_cast_fp16")]; tensor layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702656)))]; tensor var_1090_cast_fp16 = mul(x = var_1089_cast_fp16, y = layers_5_input_layernorm_weight_to_fp16)[name = string("op_1090_cast_fp16")]; tensor layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80801984))))[name = string("layers_5_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = var_1090_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor var_1106 = const()[name = string("op_1106"), val = tensor([1, 1, 16, 128])]; tensor var_1107_cast_fp16 = reshape(shape = var_1106, x = linear_35_cast_fp16)[name = string("op_1107_cast_fp16")]; tensor x_135_perm_0 = const()[name = string("x_135_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80802560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851200))))[name = string("layers_5_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = var_1090_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor var_1111 = const()[name = string("op_1111"), val = tensor([1, 1, 8, 128])]; tensor var_1112_cast_fp16 = reshape(shape = var_1111, x = linear_36_cast_fp16)[name = string("op_1112_cast_fp16")]; tensor x_139_perm_0 = const()[name = string("x_139_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900416))))[name = string("layers_5_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = var_1090_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor var_1116 = const()[name = string("op_1116"), val = tensor([1, 1, 8, 128])]; tensor var_1117_cast_fp16 = reshape(shape = var_1116, x = linear_37_cast_fp16)[name = string("op_1117_cast_fp16")]; tensor v_11_perm_0 = const()[name = string("v_11_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_1073_promoted_1_to_fp16 = const()[name = string("op_1073_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_135_cast_fp16 = transpose(perm = x_135_perm_0, x = var_1107_cast_fp16)[name = string("transpose_91")]; tensor var_1121_cast_fp16 = pow(x = x_135_cast_fp16, y = var_1073_promoted_1_to_fp16)[name = string("op_1121_cast_fp16")]; tensor var_1123_axes_0 = const()[name = string("op_1123_axes_0"), val = tensor([-1])]; bool var_1123_keep_dims_0 = const()[name = string("op_1123_keep_dims_0"), val = bool(true)]; tensor var_1123_cast_fp16 = reduce_mean(axes = var_1123_axes_0, keep_dims = var_1123_keep_dims_0, x = var_1121_cast_fp16)[name = string("op_1123_cast_fp16")]; fp16 var_1124_to_fp16 = const()[name = string("op_1124_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1125_cast_fp16 = add(x = var_1123_cast_fp16, y = var_1124_to_fp16)[name = string("op_1125_cast_fp16")]; fp32 norm_43_epsilon_0 = const()[name = string("norm_43_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_43_cast_fp16 = rsqrt(epsilon = norm_43_epsilon_0, x = var_1125_cast_fp16)[name = string("norm_43_cast_fp16")]; tensor var_1127_cast_fp16 = mul(x = x_135_cast_fp16, y = norm_43_cast_fp16)[name = string("op_1127_cast_fp16")]; tensor layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900992)))]; tensor var_1128_cast_fp16 = mul(x = var_1127_cast_fp16, y = layers_5_self_attn_q_norm_weight_to_fp16)[name = string("op_1128_cast_fp16")]; fp16 var_1073_promoted_2_to_fp16 = const()[name = string("op_1073_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_139_cast_fp16 = transpose(perm = x_139_perm_0, x = var_1112_cast_fp16)[name = string("transpose_90")]; tensor var_1132_cast_fp16 = pow(x = x_139_cast_fp16, y = var_1073_promoted_2_to_fp16)[name = string("op_1132_cast_fp16")]; tensor var_1134_axes_0 = const()[name = string("op_1134_axes_0"), val = tensor([-1])]; bool var_1134_keep_dims_0 = const()[name = string("op_1134_keep_dims_0"), val = bool(true)]; tensor var_1134_cast_fp16 = reduce_mean(axes = var_1134_axes_0, keep_dims = var_1134_keep_dims_0, x = var_1132_cast_fp16)[name = string("op_1134_cast_fp16")]; fp16 var_1135_to_fp16 = const()[name = string("op_1135_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1136_cast_fp16 = add(x = var_1134_cast_fp16, y = var_1135_to_fp16)[name = string("op_1136_cast_fp16")]; fp32 norm_45_epsilon_0 = const()[name = string("norm_45_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_45_cast_fp16 = rsqrt(epsilon = norm_45_epsilon_0, x = var_1136_cast_fp16)[name = string("norm_45_cast_fp16")]; tensor var_1138_cast_fp16 = mul(x = x_139_cast_fp16, y = norm_45_cast_fp16)[name = string("op_1138_cast_fp16")]; tensor layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901312)))]; tensor var_1139_cast_fp16 = mul(x = var_1138_cast_fp16, y = layers_5_self_attn_k_norm_weight_to_fp16)[name = string("op_1139_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1128_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1128_cast_fp16)[name = string("x2_21_cast_fp16")]; tensor var_1157_cast_fp16 = mul(x = x1_21_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1157_cast_fp16")]; tensor var_1158_cast_fp16 = mul(x = x2_21_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1158_cast_fp16")]; tensor var_1159_cast_fp16 = sub(x = var_1157_cast_fp16, y = var_1158_cast_fp16)[name = string("op_1159_cast_fp16")]; tensor var_1160_cast_fp16 = mul(x = x2_21_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1160_cast_fp16")]; tensor var_1161_cast_fp16 = mul(x = x1_21_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1161_cast_fp16")]; tensor var_1162_cast_fp16 = add(x = var_1160_cast_fp16, y = var_1161_cast_fp16)[name = string("op_1162_cast_fp16")]; bool q_11_interleave_0 = const()[name = string("q_11_interleave_0"), val = bool(false)]; tensor q_11_cast_fp16 = concat(axis = var_1074, interleave = q_11_interleave_0, values = (var_1159_cast_fp16, var_1162_cast_fp16))[name = string("q_11_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1139_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1139_cast_fp16)[name = string("x2_23_cast_fp16")]; tensor var_1181_cast_fp16 = mul(x = x1_23_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1181_cast_fp16")]; tensor var_1182_cast_fp16 = mul(x = x2_23_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1182_cast_fp16")]; tensor var_1183_cast_fp16 = sub(x = var_1181_cast_fp16, y = var_1182_cast_fp16)[name = string("op_1183_cast_fp16")]; tensor var_1184_cast_fp16 = mul(x = x2_23_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1184_cast_fp16")]; tensor var_1185_cast_fp16 = mul(x = x1_23_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1185_cast_fp16")]; tensor var_1186_cast_fp16 = add(x = var_1184_cast_fp16, y = var_1185_cast_fp16)[name = string("op_1186_cast_fp16")]; bool k_11_interleave_0 = const()[name = string("k_11_interleave_0"), val = bool(false)]; tensor k_11_cast_fp16 = concat(axis = var_1074, interleave = k_11_interleave_0, values = (var_1183_cast_fp16, var_1186_cast_fp16))[name = string("k_11_cast_fp16")]; tensor read_state_10 = read_state(input = k_cache_5)[name = string("read_state_10")]; tensor k_cache_33_cast_fp16 = mul(x = read_state_10, y = var_264_cast_fp16)[name = string("k_cache_33_cast_fp16")]; write_state(data = k_cache_33_cast_fp16, input = k_cache_5)[name = string("coreml_update_state_132_write_state")]; tensor coreml_update_state_132 = read_state(input = k_cache_5)[name = string("coreml_update_state_132")]; tensor var_1191_cast_fp16 = mul(x = k_11_cast_fp16, y = onehot_cast_fp16)[name = string("op_1191_cast_fp16")]; tensor k_cache_35_cast_fp16 = add(x = coreml_update_state_132, y = var_1191_cast_fp16)[name = string("k_cache_35_cast_fp16")]; write_state(data = k_cache_35_cast_fp16, input = k_cache_5)[name = string("coreml_update_state_133_write_state")]; tensor coreml_update_state_133 = read_state(input = k_cache_5)[name = string("coreml_update_state_133")]; tensor read_state_11 = read_state(input = v_cache_5)[name = string("read_state_11")]; tensor v_cache_33_cast_fp16 = mul(x = read_state_11, y = var_264_cast_fp16)[name = string("v_cache_33_cast_fp16")]; write_state(data = v_cache_33_cast_fp16, input = v_cache_5)[name = string("coreml_update_state_134_write_state")]; tensor coreml_update_state_134 = read_state(input = v_cache_5)[name = string("coreml_update_state_134")]; tensor v_11_cast_fp16 = transpose(perm = v_11_perm_0, x = var_1117_cast_fp16)[name = string("transpose_89")]; tensor var_1195_cast_fp16 = mul(x = v_11_cast_fp16, y = onehot_cast_fp16)[name = string("op_1195_cast_fp16")]; tensor v_cache_35_cast_fp16 = add(x = coreml_update_state_134, y = var_1195_cast_fp16)[name = string("v_cache_35_cast_fp16")]; write_state(data = v_cache_35_cast_fp16, input = v_cache_5)[name = string("coreml_update_state_135_write_state")]; tensor coreml_update_state_135 = read_state(input = v_cache_5)[name = string("coreml_update_state_135")]; tensor var_1197_axes_0 = const()[name = string("op_1197_axes_0"), val = tensor([2])]; tensor var_1197_cast_fp16 = expand_dims(axes = var_1197_axes_0, x = coreml_update_state_133)[name = string("op_1197_cast_fp16")]; tensor k_exp_21_reps_0 = const()[name = string("k_exp_21_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_21_cast_fp16 = tile(reps = k_exp_21_reps_0, x = var_1197_cast_fp16)[name = string("k_exp_21_cast_fp16")]; tensor var_1200 = const()[name = string("op_1200"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_23_cast_fp16 = reshape(shape = var_1200, x = k_exp_21_cast_fp16)[name = string("k_exp_23_cast_fp16")]; tensor var_1202_axes_0 = const()[name = string("op_1202_axes_0"), val = tensor([2])]; tensor var_1202_cast_fp16 = expand_dims(axes = var_1202_axes_0, x = coreml_update_state_135)[name = string("op_1202_cast_fp16")]; tensor v_exp_21_reps_0 = const()[name = string("v_exp_21_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_21_cast_fp16 = tile(reps = v_exp_21_reps_0, x = var_1202_cast_fp16)[name = string("v_exp_21_cast_fp16")]; tensor var_1205 = const()[name = string("op_1205"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_23_cast_fp16 = reshape(shape = var_1205, x = v_exp_21_cast_fp16)[name = string("v_exp_23_cast_fp16")]; bool var_1208_transpose_x_1 = const()[name = string("op_1208_transpose_x_1"), val = bool(false)]; bool var_1208_transpose_y_1 = const()[name = string("op_1208_transpose_y_1"), val = bool(true)]; tensor var_1208_cast_fp16 = matmul(transpose_x = var_1208_transpose_x_1, transpose_y = var_1208_transpose_y_1, x = q_11_cast_fp16, y = k_exp_23_cast_fp16)[name = string("op_1208_cast_fp16")]; fp16 var_1209_to_fp16 = const()[name = string("op_1209_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_21_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = string("attn_21_cast_fp16")]; tensor input_51_cast_fp16 = add(x = attn_21_cast_fp16, y = attention_mask_to_fp16)[name = string("input_51_cast_fp16")]; tensor attn_23_cast_fp16 = softmax(axis = var_1074, x = input_51_cast_fp16)[name = string("attn_23_cast_fp16")]; bool out_11_transpose_x_0 = const()[name = string("out_11_transpose_x_0"), val = bool(false)]; bool out_11_transpose_y_0 = const()[name = string("out_11_transpose_y_0"), val = bool(false)]; tensor out_11_cast_fp16 = matmul(transpose_x = out_11_transpose_x_0, transpose_y = out_11_transpose_y_0, x = attn_23_cast_fp16, y = v_exp_23_cast_fp16)[name = string("out_11_cast_fp16")]; tensor var_1214_perm_0 = const()[name = string("op_1214_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1215 = const()[name = string("op_1215"), val = tensor([1, 1, -1])]; tensor var_1214_cast_fp16 = transpose(perm = var_1214_perm_0, x = out_11_cast_fp16)[name = string("transpose_88")]; tensor input_53_cast_fp16 = reshape(shape = var_1215, x = var_1214_cast_fp16)[name = string("input_53_cast_fp16")]; tensor layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84998848))))[name = string("layers_5_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor x_149_cast_fp16 = add(x = x_129_cast_fp16, y = linear_38_cast_fp16)[name = string("x_149_cast_fp16")]; fp16 var_1073_promoted_3_to_fp16 = const()[name = string("op_1073_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1222_cast_fp16 = pow(x = x_149_cast_fp16, y = var_1073_promoted_3_to_fp16)[name = string("op_1222_cast_fp16")]; tensor var_1224_axes_0 = const()[name = string("op_1224_axes_0"), val = tensor([-1])]; bool var_1224_keep_dims_0 = const()[name = string("op_1224_keep_dims_0"), val = bool(true)]; tensor var_1224_cast_fp16 = reduce_mean(axes = var_1224_axes_0, keep_dims = var_1224_keep_dims_0, x = var_1222_cast_fp16)[name = string("op_1224_cast_fp16")]; fp16 var_1225_to_fp16 = const()[name = string("op_1225_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1226_cast_fp16 = add(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = string("op_1226_cast_fp16")]; fp32 norm_47_epsilon_0 = const()[name = string("norm_47_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_47_cast_fp16 = rsqrt(epsilon = norm_47_epsilon_0, x = var_1226_cast_fp16)[name = string("norm_47_cast_fp16")]; tensor var_1228_cast_fp16 = mul(x = x_149_cast_fp16, y = norm_47_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84999424)))]; tensor var_1229_cast_fp16 = mul(x = var_1228_cast_fp16, y = layers_5_post_attention_layernorm_weight_to_fp16)[name = string("op_1229_cast_fp16")]; tensor layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85001536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147328))))[name = string("layers_5_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = var_1229_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_1239_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1239_cast_fp16")]; tensor layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91293696))))[name = string("layers_5_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_up_proj_weight_to_fp16_palettized, x = var_1229_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_59_cast_fp16 = mul(x = var_1239_cast_fp16, y = linear_40_cast_fp16)[name = string("input_59_cast_fp16")]; tensor layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91294272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440064))))[name = string("layers_5_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor x_155_cast_fp16 = add(x = x_149_cast_fp16, y = linear_41_cast_fp16)[name = string("x_155_cast_fp16")]; int32 var_1259 = const()[name = string("op_1259"), val = int32(-1)]; fp16 var_1258_promoted_to_fp16 = const()[name = string("op_1258_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1268_cast_fp16 = pow(x = x_155_cast_fp16, y = var_1258_promoted_to_fp16)[name = string("op_1268_cast_fp16")]; tensor var_1270_axes_0 = const()[name = string("op_1270_axes_0"), val = tensor([-1])]; bool var_1270_keep_dims_0 = const()[name = string("op_1270_keep_dims_0"), val = bool(true)]; tensor var_1270_cast_fp16 = reduce_mean(axes = var_1270_axes_0, keep_dims = var_1270_keep_dims_0, x = var_1268_cast_fp16)[name = string("op_1270_cast_fp16")]; fp16 var_1271_to_fp16 = const()[name = string("op_1271_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1272_cast_fp16 = add(x = var_1270_cast_fp16, y = var_1271_to_fp16)[name = string("op_1272_cast_fp16")]; fp32 norm_49_epsilon_0 = const()[name = string("norm_49_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_49_cast_fp16 = rsqrt(epsilon = norm_49_epsilon_0, x = var_1272_cast_fp16)[name = string("norm_49_cast_fp16")]; tensor var_1274_cast_fp16 = mul(x = x_155_cast_fp16, y = norm_49_cast_fp16)[name = string("op_1274_cast_fp16")]; tensor layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440640)))]; tensor var_1275_cast_fp16 = mul(x = var_1274_cast_fp16, y = layers_6_input_layernorm_weight_to_fp16)[name = string("op_1275_cast_fp16")]; tensor layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94442752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96539968))))[name = string("layers_6_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = var_1275_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_1291 = const()[name = string("op_1291"), val = tensor([1, 1, 16, 128])]; tensor var_1292_cast_fp16 = reshape(shape = var_1291, x = linear_42_cast_fp16)[name = string("op_1292_cast_fp16")]; tensor x_161_perm_0 = const()[name = string("x_161_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96540544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589184))))[name = string("layers_6_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = var_1275_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor var_1296 = const()[name = string("op_1296"), val = tensor([1, 1, 8, 128])]; tensor var_1297_cast_fp16 = reshape(shape = var_1296, x = linear_43_cast_fp16)[name = string("op_1297_cast_fp16")]; tensor x_165_perm_0 = const()[name = string("x_165_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638400))))[name = string("layers_6_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = var_1275_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor var_1301 = const()[name = string("op_1301"), val = tensor([1, 1, 8, 128])]; tensor var_1302_cast_fp16 = reshape(shape = var_1301, x = linear_44_cast_fp16)[name = string("op_1302_cast_fp16")]; tensor v_13_perm_0 = const()[name = string("v_13_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_1258_promoted_1_to_fp16 = const()[name = string("op_1258_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_161_cast_fp16 = transpose(perm = x_161_perm_0, x = var_1292_cast_fp16)[name = string("transpose_87")]; tensor var_1306_cast_fp16 = pow(x = x_161_cast_fp16, y = var_1258_promoted_1_to_fp16)[name = string("op_1306_cast_fp16")]; tensor var_1308_axes_0 = const()[name = string("op_1308_axes_0"), val = tensor([-1])]; bool var_1308_keep_dims_0 = const()[name = string("op_1308_keep_dims_0"), val = bool(true)]; tensor var_1308_cast_fp16 = reduce_mean(axes = var_1308_axes_0, keep_dims = var_1308_keep_dims_0, x = var_1306_cast_fp16)[name = string("op_1308_cast_fp16")]; fp16 var_1309_to_fp16 = const()[name = string("op_1309_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1310_cast_fp16 = add(x = var_1308_cast_fp16, y = var_1309_to_fp16)[name = string("op_1310_cast_fp16")]; fp32 norm_51_epsilon_0 = const()[name = string("norm_51_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_51_cast_fp16 = rsqrt(epsilon = norm_51_epsilon_0, x = var_1310_cast_fp16)[name = string("norm_51_cast_fp16")]; tensor var_1312_cast_fp16 = mul(x = x_161_cast_fp16, y = norm_51_cast_fp16)[name = string("op_1312_cast_fp16")]; tensor layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638976)))]; tensor var_1313_cast_fp16 = mul(x = var_1312_cast_fp16, y = layers_6_self_attn_q_norm_weight_to_fp16)[name = string("op_1313_cast_fp16")]; fp16 var_1258_promoted_2_to_fp16 = const()[name = string("op_1258_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_165_cast_fp16 = transpose(perm = x_165_perm_0, x = var_1297_cast_fp16)[name = string("transpose_86")]; tensor var_1317_cast_fp16 = pow(x = x_165_cast_fp16, y = var_1258_promoted_2_to_fp16)[name = string("op_1317_cast_fp16")]; tensor var_1319_axes_0 = const()[name = string("op_1319_axes_0"), val = tensor([-1])]; bool var_1319_keep_dims_0 = const()[name = string("op_1319_keep_dims_0"), val = bool(true)]; tensor var_1319_cast_fp16 = reduce_mean(axes = var_1319_axes_0, keep_dims = var_1319_keep_dims_0, x = var_1317_cast_fp16)[name = string("op_1319_cast_fp16")]; fp16 var_1320_to_fp16 = const()[name = string("op_1320_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1321_cast_fp16 = add(x = var_1319_cast_fp16, y = var_1320_to_fp16)[name = string("op_1321_cast_fp16")]; fp32 norm_53_epsilon_0 = const()[name = string("norm_53_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_53_cast_fp16 = rsqrt(epsilon = norm_53_epsilon_0, x = var_1321_cast_fp16)[name = string("norm_53_cast_fp16")]; tensor var_1323_cast_fp16 = mul(x = x_165_cast_fp16, y = norm_53_cast_fp16)[name = string("op_1323_cast_fp16")]; tensor layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639296)))]; tensor var_1324_cast_fp16 = mul(x = var_1323_cast_fp16, y = layers_6_self_attn_k_norm_weight_to_fp16)[name = string("op_1324_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1313_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1313_cast_fp16)[name = string("x2_25_cast_fp16")]; tensor var_1342_cast_fp16 = mul(x = x1_25_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1342_cast_fp16")]; tensor var_1343_cast_fp16 = mul(x = x2_25_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1343_cast_fp16")]; tensor var_1344_cast_fp16 = sub(x = var_1342_cast_fp16, y = var_1343_cast_fp16)[name = string("op_1344_cast_fp16")]; tensor var_1345_cast_fp16 = mul(x = x2_25_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1345_cast_fp16")]; tensor var_1346_cast_fp16 = mul(x = x1_25_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1346_cast_fp16")]; tensor var_1347_cast_fp16 = add(x = var_1345_cast_fp16, y = var_1346_cast_fp16)[name = string("op_1347_cast_fp16")]; bool q_13_interleave_0 = const()[name = string("q_13_interleave_0"), val = bool(false)]; tensor q_13_cast_fp16 = concat(axis = var_1259, interleave = q_13_interleave_0, values = (var_1344_cast_fp16, var_1347_cast_fp16))[name = string("q_13_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1324_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1324_cast_fp16)[name = string("x2_27_cast_fp16")]; tensor var_1366_cast_fp16 = mul(x = x1_27_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1366_cast_fp16")]; tensor var_1367_cast_fp16 = mul(x = x2_27_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1367_cast_fp16")]; tensor var_1368_cast_fp16 = sub(x = var_1366_cast_fp16, y = var_1367_cast_fp16)[name = string("op_1368_cast_fp16")]; tensor var_1369_cast_fp16 = mul(x = x2_27_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1369_cast_fp16")]; tensor var_1370_cast_fp16 = mul(x = x1_27_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1370_cast_fp16")]; tensor var_1371_cast_fp16 = add(x = var_1369_cast_fp16, y = var_1370_cast_fp16)[name = string("op_1371_cast_fp16")]; bool k_13_interleave_0 = const()[name = string("k_13_interleave_0"), val = bool(false)]; tensor k_13_cast_fp16 = concat(axis = var_1259, interleave = k_13_interleave_0, values = (var_1368_cast_fp16, var_1371_cast_fp16))[name = string("k_13_cast_fp16")]; tensor read_state_12 = read_state(input = k_cache_6)[name = string("read_state_12")]; tensor k_cache_39_cast_fp16 = mul(x = read_state_12, y = var_264_cast_fp16)[name = string("k_cache_39_cast_fp16")]; write_state(data = k_cache_39_cast_fp16, input = k_cache_6)[name = string("coreml_update_state_136_write_state")]; tensor coreml_update_state_136 = read_state(input = k_cache_6)[name = string("coreml_update_state_136")]; tensor var_1376_cast_fp16 = mul(x = k_13_cast_fp16, y = onehot_cast_fp16)[name = string("op_1376_cast_fp16")]; tensor k_cache_41_cast_fp16 = add(x = coreml_update_state_136, y = var_1376_cast_fp16)[name = string("k_cache_41_cast_fp16")]; write_state(data = k_cache_41_cast_fp16, input = k_cache_6)[name = string("coreml_update_state_137_write_state")]; tensor coreml_update_state_137 = read_state(input = k_cache_6)[name = string("coreml_update_state_137")]; tensor read_state_13 = read_state(input = v_cache_6)[name = string("read_state_13")]; tensor v_cache_39_cast_fp16 = mul(x = read_state_13, y = var_264_cast_fp16)[name = string("v_cache_39_cast_fp16")]; write_state(data = v_cache_39_cast_fp16, input = v_cache_6)[name = string("coreml_update_state_138_write_state")]; tensor coreml_update_state_138 = read_state(input = v_cache_6)[name = string("coreml_update_state_138")]; tensor v_13_cast_fp16 = transpose(perm = v_13_perm_0, x = var_1302_cast_fp16)[name = string("transpose_85")]; tensor var_1380_cast_fp16 = mul(x = v_13_cast_fp16, y = onehot_cast_fp16)[name = string("op_1380_cast_fp16")]; tensor v_cache_41_cast_fp16 = add(x = coreml_update_state_138, y = var_1380_cast_fp16)[name = string("v_cache_41_cast_fp16")]; write_state(data = v_cache_41_cast_fp16, input = v_cache_6)[name = string("coreml_update_state_139_write_state")]; tensor coreml_update_state_139 = read_state(input = v_cache_6)[name = string("coreml_update_state_139")]; tensor var_1382_axes_0 = const()[name = string("op_1382_axes_0"), val = tensor([2])]; tensor var_1382_cast_fp16 = expand_dims(axes = var_1382_axes_0, x = coreml_update_state_137)[name = string("op_1382_cast_fp16")]; tensor k_exp_25_reps_0 = const()[name = string("k_exp_25_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_25_cast_fp16 = tile(reps = k_exp_25_reps_0, x = var_1382_cast_fp16)[name = string("k_exp_25_cast_fp16")]; tensor var_1385 = const()[name = string("op_1385"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_27_cast_fp16 = reshape(shape = var_1385, x = k_exp_25_cast_fp16)[name = string("k_exp_27_cast_fp16")]; tensor var_1387_axes_0 = const()[name = string("op_1387_axes_0"), val = tensor([2])]; tensor var_1387_cast_fp16 = expand_dims(axes = var_1387_axes_0, x = coreml_update_state_139)[name = string("op_1387_cast_fp16")]; tensor v_exp_25_reps_0 = const()[name = string("v_exp_25_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_25_cast_fp16 = tile(reps = v_exp_25_reps_0, x = var_1387_cast_fp16)[name = string("v_exp_25_cast_fp16")]; tensor var_1390 = const()[name = string("op_1390"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_27_cast_fp16 = reshape(shape = var_1390, x = v_exp_25_cast_fp16)[name = string("v_exp_27_cast_fp16")]; bool var_1393_transpose_x_1 = const()[name = string("op_1393_transpose_x_1"), val = bool(false)]; bool var_1393_transpose_y_1 = const()[name = string("op_1393_transpose_y_1"), val = bool(true)]; tensor var_1393_cast_fp16 = matmul(transpose_x = var_1393_transpose_x_1, transpose_y = var_1393_transpose_y_1, x = q_13_cast_fp16, y = k_exp_27_cast_fp16)[name = string("op_1393_cast_fp16")]; fp16 var_1394_to_fp16 = const()[name = string("op_1394_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_25_cast_fp16 = mul(x = var_1393_cast_fp16, y = var_1394_to_fp16)[name = string("attn_25_cast_fp16")]; tensor input_61_cast_fp16 = add(x = attn_25_cast_fp16, y = attention_mask_to_fp16)[name = string("input_61_cast_fp16")]; tensor attn_27_cast_fp16 = softmax(axis = var_1259, x = input_61_cast_fp16)[name = string("attn_27_cast_fp16")]; bool out_13_transpose_x_0 = const()[name = string("out_13_transpose_x_0"), val = bool(false)]; bool out_13_transpose_y_0 = const()[name = string("out_13_transpose_y_0"), val = bool(false)]; tensor out_13_cast_fp16 = matmul(transpose_x = out_13_transpose_x_0, transpose_y = out_13_transpose_y_0, x = attn_27_cast_fp16, y = v_exp_27_cast_fp16)[name = string("out_13_cast_fp16")]; tensor var_1399_perm_0 = const()[name = string("op_1399_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1400 = const()[name = string("op_1400"), val = tensor([1, 1, -1])]; tensor var_1399_cast_fp16 = transpose(perm = var_1399_perm_0, x = out_13_cast_fp16)[name = string("transpose_84")]; tensor input_63_cast_fp16 = reshape(shape = var_1400, x = var_1399_cast_fp16)[name = string("input_63_cast_fp16")]; tensor layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100736832))))[name = string("layers_6_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor x_175_cast_fp16 = add(x = x_155_cast_fp16, y = linear_45_cast_fp16)[name = string("x_175_cast_fp16")]; fp16 var_1258_promoted_3_to_fp16 = const()[name = string("op_1258_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1407_cast_fp16 = pow(x = x_175_cast_fp16, y = var_1258_promoted_3_to_fp16)[name = string("op_1407_cast_fp16")]; tensor var_1409_axes_0 = const()[name = string("op_1409_axes_0"), val = tensor([-1])]; bool var_1409_keep_dims_0 = const()[name = string("op_1409_keep_dims_0"), val = bool(true)]; tensor var_1409_cast_fp16 = reduce_mean(axes = var_1409_axes_0, keep_dims = var_1409_keep_dims_0, x = var_1407_cast_fp16)[name = string("op_1409_cast_fp16")]; fp16 var_1410_to_fp16 = const()[name = string("op_1410_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1411_cast_fp16 = add(x = var_1409_cast_fp16, y = var_1410_to_fp16)[name = string("op_1411_cast_fp16")]; fp32 norm_55_epsilon_0 = const()[name = string("norm_55_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_55_cast_fp16 = rsqrt(epsilon = norm_55_epsilon_0, x = var_1411_cast_fp16)[name = string("norm_55_cast_fp16")]; tensor var_1413_cast_fp16 = mul(x = x_175_cast_fp16, y = norm_55_cast_fp16)[name = string("op_1413_cast_fp16")]; tensor layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100737408)))]; tensor var_1414_cast_fp16 = mul(x = var_1413_cast_fp16, y = layers_6_post_attention_layernorm_weight_to_fp16)[name = string("op_1414_cast_fp16")]; tensor layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100739520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885312))))[name = string("layers_6_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = var_1414_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_1424_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1424_cast_fp16")]; tensor layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107031680))))[name = string("layers_6_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_up_proj_weight_to_fp16_palettized, x = var_1414_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_69_cast_fp16 = mul(x = var_1424_cast_fp16, y = linear_47_cast_fp16)[name = string("input_69_cast_fp16")]; tensor layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107032256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178048))))[name = string("layers_6_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_48_cast_fp16)[name = string("x_181_cast_fp16")]; int32 var_1444 = const()[name = string("op_1444"), val = int32(-1)]; fp16 var_1443_promoted_to_fp16 = const()[name = string("op_1443_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1453_cast_fp16 = pow(x = x_181_cast_fp16, y = var_1443_promoted_to_fp16)[name = string("op_1453_cast_fp16")]; tensor var_1455_axes_0 = const()[name = string("op_1455_axes_0"), val = tensor([-1])]; bool var_1455_keep_dims_0 = const()[name = string("op_1455_keep_dims_0"), val = bool(true)]; tensor var_1455_cast_fp16 = reduce_mean(axes = var_1455_axes_0, keep_dims = var_1455_keep_dims_0, x = var_1453_cast_fp16)[name = string("op_1455_cast_fp16")]; fp16 var_1456_to_fp16 = const()[name = string("op_1456_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1457_cast_fp16 = add(x = var_1455_cast_fp16, y = var_1456_to_fp16)[name = string("op_1457_cast_fp16")]; fp32 norm_57_epsilon_0 = const()[name = string("norm_57_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_57_cast_fp16 = rsqrt(epsilon = norm_57_epsilon_0, x = var_1457_cast_fp16)[name = string("norm_57_cast_fp16")]; tensor var_1459_cast_fp16 = mul(x = x_181_cast_fp16, y = norm_57_cast_fp16)[name = string("op_1459_cast_fp16")]; tensor layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178624)))]; tensor var_1460_cast_fp16 = mul(x = var_1459_cast_fp16, y = layers_7_input_layernorm_weight_to_fp16)[name = string("op_1460_cast_fp16")]; tensor layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112277952))))[name = string("layers_7_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = var_1460_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor var_1476 = const()[name = string("op_1476"), val = tensor([1, 1, 16, 128])]; tensor var_1477_cast_fp16 = reshape(shape = var_1476, x = linear_49_cast_fp16)[name = string("op_1477_cast_fp16")]; tensor x_187_perm_0 = const()[name = string("x_187_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112278528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327168))))[name = string("layers_7_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = var_1460_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor var_1481 = const()[name = string("op_1481"), val = tensor([1, 1, 8, 128])]; tensor var_1482_cast_fp16 = reshape(shape = var_1481, x = linear_50_cast_fp16)[name = string("op_1482_cast_fp16")]; tensor x_191_perm_0 = const()[name = string("x_191_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376384))))[name = string("layers_7_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = var_1460_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor var_1486 = const()[name = string("op_1486"), val = tensor([1, 1, 8, 128])]; tensor var_1487_cast_fp16 = reshape(shape = var_1486, x = linear_51_cast_fp16)[name = string("op_1487_cast_fp16")]; tensor v_15_perm_0 = const()[name = string("v_15_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_1443_promoted_1_to_fp16 = const()[name = string("op_1443_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_187_cast_fp16 = transpose(perm = x_187_perm_0, x = var_1477_cast_fp16)[name = string("transpose_83")]; tensor var_1491_cast_fp16 = pow(x = x_187_cast_fp16, y = var_1443_promoted_1_to_fp16)[name = string("op_1491_cast_fp16")]; tensor var_1493_axes_0 = const()[name = string("op_1493_axes_0"), val = tensor([-1])]; bool var_1493_keep_dims_0 = const()[name = string("op_1493_keep_dims_0"), val = bool(true)]; tensor var_1493_cast_fp16 = reduce_mean(axes = var_1493_axes_0, keep_dims = var_1493_keep_dims_0, x = var_1491_cast_fp16)[name = string("op_1493_cast_fp16")]; fp16 var_1494_to_fp16 = const()[name = string("op_1494_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1495_cast_fp16 = add(x = var_1493_cast_fp16, y = var_1494_to_fp16)[name = string("op_1495_cast_fp16")]; fp32 norm_59_epsilon_0 = const()[name = string("norm_59_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_59_cast_fp16 = rsqrt(epsilon = norm_59_epsilon_0, x = var_1495_cast_fp16)[name = string("norm_59_cast_fp16")]; tensor var_1497_cast_fp16 = mul(x = x_187_cast_fp16, y = norm_59_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376960)))]; tensor var_1498_cast_fp16 = mul(x = var_1497_cast_fp16, y = layers_7_self_attn_q_norm_weight_to_fp16)[name = string("op_1498_cast_fp16")]; fp16 var_1443_promoted_2_to_fp16 = const()[name = string("op_1443_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_191_cast_fp16 = transpose(perm = x_191_perm_0, x = var_1482_cast_fp16)[name = string("transpose_82")]; tensor var_1502_cast_fp16 = pow(x = x_191_cast_fp16, y = var_1443_promoted_2_to_fp16)[name = string("op_1502_cast_fp16")]; tensor var_1504_axes_0 = const()[name = string("op_1504_axes_0"), val = tensor([-1])]; bool var_1504_keep_dims_0 = const()[name = string("op_1504_keep_dims_0"), val = bool(true)]; tensor var_1504_cast_fp16 = reduce_mean(axes = var_1504_axes_0, keep_dims = var_1504_keep_dims_0, x = var_1502_cast_fp16)[name = string("op_1504_cast_fp16")]; fp16 var_1505_to_fp16 = const()[name = string("op_1505_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1506_cast_fp16 = add(x = var_1504_cast_fp16, y = var_1505_to_fp16)[name = string("op_1506_cast_fp16")]; fp32 norm_61_epsilon_0 = const()[name = string("norm_61_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_61_cast_fp16 = rsqrt(epsilon = norm_61_epsilon_0, x = var_1506_cast_fp16)[name = string("norm_61_cast_fp16")]; tensor var_1508_cast_fp16 = mul(x = x_191_cast_fp16, y = norm_61_cast_fp16)[name = string("op_1508_cast_fp16")]; tensor layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377280)))]; tensor var_1509_cast_fp16 = mul(x = var_1508_cast_fp16, y = layers_7_self_attn_k_norm_weight_to_fp16)[name = string("op_1509_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1498_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1498_cast_fp16)[name = string("x2_29_cast_fp16")]; tensor var_1527_cast_fp16 = mul(x = x1_29_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1527_cast_fp16")]; tensor var_1528_cast_fp16 = mul(x = x2_29_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1528_cast_fp16")]; tensor var_1529_cast_fp16 = sub(x = var_1527_cast_fp16, y = var_1528_cast_fp16)[name = string("op_1529_cast_fp16")]; tensor var_1530_cast_fp16 = mul(x = x2_29_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1530_cast_fp16")]; tensor var_1531_cast_fp16 = mul(x = x1_29_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1531_cast_fp16")]; tensor var_1532_cast_fp16 = add(x = var_1530_cast_fp16, y = var_1531_cast_fp16)[name = string("op_1532_cast_fp16")]; bool q_15_interleave_0 = const()[name = string("q_15_interleave_0"), val = bool(false)]; tensor q_15_cast_fp16 = concat(axis = var_1444, interleave = q_15_interleave_0, values = (var_1529_cast_fp16, var_1532_cast_fp16))[name = string("q_15_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1509_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1509_cast_fp16)[name = string("x2_31_cast_fp16")]; tensor var_1551_cast_fp16 = mul(x = x1_31_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1551_cast_fp16")]; tensor var_1552_cast_fp16 = mul(x = x2_31_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1552_cast_fp16")]; tensor var_1553_cast_fp16 = sub(x = var_1551_cast_fp16, y = var_1552_cast_fp16)[name = string("op_1553_cast_fp16")]; tensor var_1554_cast_fp16 = mul(x = x2_31_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1554_cast_fp16")]; tensor var_1555_cast_fp16 = mul(x = x1_31_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1555_cast_fp16")]; tensor var_1556_cast_fp16 = add(x = var_1554_cast_fp16, y = var_1555_cast_fp16)[name = string("op_1556_cast_fp16")]; bool k_15_interleave_0 = const()[name = string("k_15_interleave_0"), val = bool(false)]; tensor k_15_cast_fp16 = concat(axis = var_1444, interleave = k_15_interleave_0, values = (var_1553_cast_fp16, var_1556_cast_fp16))[name = string("k_15_cast_fp16")]; tensor read_state_14 = read_state(input = k_cache_7)[name = string("read_state_14")]; tensor k_cache_45_cast_fp16 = mul(x = read_state_14, y = var_264_cast_fp16)[name = string("k_cache_45_cast_fp16")]; write_state(data = k_cache_45_cast_fp16, input = k_cache_7)[name = string("coreml_update_state_140_write_state")]; tensor coreml_update_state_140 = read_state(input = k_cache_7)[name = string("coreml_update_state_140")]; tensor var_1561_cast_fp16 = mul(x = k_15_cast_fp16, y = onehot_cast_fp16)[name = string("op_1561_cast_fp16")]; tensor k_cache_47_cast_fp16 = add(x = coreml_update_state_140, y = var_1561_cast_fp16)[name = string("k_cache_47_cast_fp16")]; write_state(data = k_cache_47_cast_fp16, input = k_cache_7)[name = string("coreml_update_state_141_write_state")]; tensor coreml_update_state_141 = read_state(input = k_cache_7)[name = string("coreml_update_state_141")]; tensor read_state_15 = read_state(input = v_cache_7)[name = string("read_state_15")]; tensor v_cache_45_cast_fp16 = mul(x = read_state_15, y = var_264_cast_fp16)[name = string("v_cache_45_cast_fp16")]; write_state(data = v_cache_45_cast_fp16, input = v_cache_7)[name = string("coreml_update_state_142_write_state")]; tensor coreml_update_state_142 = read_state(input = v_cache_7)[name = string("coreml_update_state_142")]; tensor v_15_cast_fp16 = transpose(perm = v_15_perm_0, x = var_1487_cast_fp16)[name = string("transpose_81")]; tensor var_1565_cast_fp16 = mul(x = v_15_cast_fp16, y = onehot_cast_fp16)[name = string("op_1565_cast_fp16")]; tensor v_cache_47_cast_fp16 = add(x = coreml_update_state_142, y = var_1565_cast_fp16)[name = string("v_cache_47_cast_fp16")]; write_state(data = v_cache_47_cast_fp16, input = v_cache_7)[name = string("coreml_update_state_143_write_state")]; tensor coreml_update_state_143 = read_state(input = v_cache_7)[name = string("coreml_update_state_143")]; tensor var_1567_axes_0 = const()[name = string("op_1567_axes_0"), val = tensor([2])]; tensor var_1567_cast_fp16 = expand_dims(axes = var_1567_axes_0, x = coreml_update_state_141)[name = string("op_1567_cast_fp16")]; tensor k_exp_29_reps_0 = const()[name = string("k_exp_29_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_29_cast_fp16 = tile(reps = k_exp_29_reps_0, x = var_1567_cast_fp16)[name = string("k_exp_29_cast_fp16")]; tensor var_1570 = const()[name = string("op_1570"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_31_cast_fp16 = reshape(shape = var_1570, x = k_exp_29_cast_fp16)[name = string("k_exp_31_cast_fp16")]; tensor var_1572_axes_0 = const()[name = string("op_1572_axes_0"), val = tensor([2])]; tensor var_1572_cast_fp16 = expand_dims(axes = var_1572_axes_0, x = coreml_update_state_143)[name = string("op_1572_cast_fp16")]; tensor v_exp_29_reps_0 = const()[name = string("v_exp_29_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_29_cast_fp16 = tile(reps = v_exp_29_reps_0, x = var_1572_cast_fp16)[name = string("v_exp_29_cast_fp16")]; tensor var_1575 = const()[name = string("op_1575"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_31_cast_fp16 = reshape(shape = var_1575, x = v_exp_29_cast_fp16)[name = string("v_exp_31_cast_fp16")]; bool var_1578_transpose_x_1 = const()[name = string("op_1578_transpose_x_1"), val = bool(false)]; bool var_1578_transpose_y_1 = const()[name = string("op_1578_transpose_y_1"), val = bool(true)]; tensor var_1578_cast_fp16 = matmul(transpose_x = var_1578_transpose_x_1, transpose_y = var_1578_transpose_y_1, x = q_15_cast_fp16, y = k_exp_31_cast_fp16)[name = string("op_1578_cast_fp16")]; fp16 var_1579_to_fp16 = const()[name = string("op_1579_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_29_cast_fp16 = mul(x = var_1578_cast_fp16, y = var_1579_to_fp16)[name = string("attn_29_cast_fp16")]; tensor input_71_cast_fp16 = add(x = attn_29_cast_fp16, y = attention_mask_to_fp16)[name = string("input_71_cast_fp16")]; tensor attn_31_cast_fp16 = softmax(axis = var_1444, x = input_71_cast_fp16)[name = string("attn_31_cast_fp16")]; bool out_15_transpose_x_0 = const()[name = string("out_15_transpose_x_0"), val = bool(false)]; bool out_15_transpose_y_0 = const()[name = string("out_15_transpose_y_0"), val = bool(false)]; tensor out_15_cast_fp16 = matmul(transpose_x = out_15_transpose_x_0, transpose_y = out_15_transpose_y_0, x = attn_31_cast_fp16, y = v_exp_31_cast_fp16)[name = string("out_15_cast_fp16")]; tensor var_1584_perm_0 = const()[name = string("op_1584_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1585 = const()[name = string("op_1585"), val = tensor([1, 1, -1])]; tensor var_1584_cast_fp16 = transpose(perm = var_1584_perm_0, x = out_15_cast_fp16)[name = string("transpose_80")]; tensor input_73_cast_fp16 = reshape(shape = var_1585, x = var_1584_cast_fp16)[name = string("input_73_cast_fp16")]; tensor layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116474816))))[name = string("layers_7_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor x_201_cast_fp16 = add(x = x_181_cast_fp16, y = linear_52_cast_fp16)[name = string("x_201_cast_fp16")]; fp16 var_1443_promoted_3_to_fp16 = const()[name = string("op_1443_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1592_cast_fp16 = pow(x = x_201_cast_fp16, y = var_1443_promoted_3_to_fp16)[name = string("op_1592_cast_fp16")]; tensor var_1594_axes_0 = const()[name = string("op_1594_axes_0"), val = tensor([-1])]; bool var_1594_keep_dims_0 = const()[name = string("op_1594_keep_dims_0"), val = bool(true)]; tensor var_1594_cast_fp16 = reduce_mean(axes = var_1594_axes_0, keep_dims = var_1594_keep_dims_0, x = var_1592_cast_fp16)[name = string("op_1594_cast_fp16")]; fp16 var_1595_to_fp16 = const()[name = string("op_1595_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1596_cast_fp16 = add(x = var_1594_cast_fp16, y = var_1595_to_fp16)[name = string("op_1596_cast_fp16")]; fp32 norm_63_epsilon_0 = const()[name = string("norm_63_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_63_cast_fp16 = rsqrt(epsilon = norm_63_epsilon_0, x = var_1596_cast_fp16)[name = string("norm_63_cast_fp16")]; tensor var_1598_cast_fp16 = mul(x = x_201_cast_fp16, y = norm_63_cast_fp16)[name = string("op_1598_cast_fp16")]; tensor layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116475392)))]; tensor var_1599_cast_fp16 = mul(x = var_1598_cast_fp16, y = layers_7_post_attention_layernorm_weight_to_fp16)[name = string("op_1599_cast_fp16")]; tensor layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116477504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623296))))[name = string("layers_7_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = var_1599_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_1609_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1609_cast_fp16")]; tensor layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122769664))))[name = string("layers_7_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_up_proj_weight_to_fp16_palettized, x = var_1599_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_79_cast_fp16 = mul(x = var_1609_cast_fp16, y = linear_54_cast_fp16)[name = string("input_79_cast_fp16")]; tensor layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122770240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916032))))[name = string("layers_7_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_55_cast_fp16)[name = string("x_207_cast_fp16")]; int32 var_1629 = const()[name = string("op_1629"), val = int32(-1)]; fp16 var_1628_promoted_to_fp16 = const()[name = string("op_1628_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1638_cast_fp16 = pow(x = x_207_cast_fp16, y = var_1628_promoted_to_fp16)[name = string("op_1638_cast_fp16")]; tensor var_1640_axes_0 = const()[name = string("op_1640_axes_0"), val = tensor([-1])]; bool var_1640_keep_dims_0 = const()[name = string("op_1640_keep_dims_0"), val = bool(true)]; tensor var_1640_cast_fp16 = reduce_mean(axes = var_1640_axes_0, keep_dims = var_1640_keep_dims_0, x = var_1638_cast_fp16)[name = string("op_1640_cast_fp16")]; fp16 var_1641_to_fp16 = const()[name = string("op_1641_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1642_cast_fp16 = add(x = var_1640_cast_fp16, y = var_1641_to_fp16)[name = string("op_1642_cast_fp16")]; fp32 norm_65_epsilon_0 = const()[name = string("norm_65_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_65_cast_fp16 = rsqrt(epsilon = norm_65_epsilon_0, x = var_1642_cast_fp16)[name = string("norm_65_cast_fp16")]; tensor var_1644_cast_fp16 = mul(x = x_207_cast_fp16, y = norm_65_cast_fp16)[name = string("op_1644_cast_fp16")]; tensor layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916608)))]; tensor var_1645_cast_fp16 = mul(x = var_1644_cast_fp16, y = layers_8_input_layernorm_weight_to_fp16)[name = string("op_1645_cast_fp16")]; tensor layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125918720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128015936))))[name = string("layers_8_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = var_1645_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor var_1661 = const()[name = string("op_1661"), val = tensor([1, 1, 16, 128])]; tensor var_1662_cast_fp16 = reshape(shape = var_1661, x = linear_56_cast_fp16)[name = string("op_1662_cast_fp16")]; tensor x_213_perm_0 = const()[name = string("x_213_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128016512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065152))))[name = string("layers_8_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = var_1645_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor var_1666 = const()[name = string("op_1666"), val = tensor([1, 1, 8, 128])]; tensor var_1667_cast_fp16 = reshape(shape = var_1666, x = linear_57_cast_fp16)[name = string("op_1667_cast_fp16")]; tensor x_217_perm_0 = const()[name = string("x_217_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114368))))[name = string("layers_8_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = var_1645_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor var_1671 = const()[name = string("op_1671"), val = tensor([1, 1, 8, 128])]; tensor var_1672_cast_fp16 = reshape(shape = var_1671, x = linear_58_cast_fp16)[name = string("op_1672_cast_fp16")]; tensor v_17_perm_0 = const()[name = string("v_17_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_1628_promoted_1_to_fp16 = const()[name = string("op_1628_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_213_cast_fp16 = transpose(perm = x_213_perm_0, x = var_1662_cast_fp16)[name = string("transpose_79")]; tensor var_1676_cast_fp16 = pow(x = x_213_cast_fp16, y = var_1628_promoted_1_to_fp16)[name = string("op_1676_cast_fp16")]; tensor var_1678_axes_0 = const()[name = string("op_1678_axes_0"), val = tensor([-1])]; bool var_1678_keep_dims_0 = const()[name = string("op_1678_keep_dims_0"), val = bool(true)]; tensor var_1678_cast_fp16 = reduce_mean(axes = var_1678_axes_0, keep_dims = var_1678_keep_dims_0, x = var_1676_cast_fp16)[name = string("op_1678_cast_fp16")]; fp16 var_1679_to_fp16 = const()[name = string("op_1679_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1680_cast_fp16 = add(x = var_1678_cast_fp16, y = var_1679_to_fp16)[name = string("op_1680_cast_fp16")]; fp32 norm_67_epsilon_0 = const()[name = string("norm_67_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_67_cast_fp16 = rsqrt(epsilon = norm_67_epsilon_0, x = var_1680_cast_fp16)[name = string("norm_67_cast_fp16")]; tensor var_1682_cast_fp16 = mul(x = x_213_cast_fp16, y = norm_67_cast_fp16)[name = string("op_1682_cast_fp16")]; tensor layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114944)))]; tensor var_1683_cast_fp16 = mul(x = var_1682_cast_fp16, y = layers_8_self_attn_q_norm_weight_to_fp16)[name = string("op_1683_cast_fp16")]; fp16 var_1628_promoted_2_to_fp16 = const()[name = string("op_1628_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_217_cast_fp16 = transpose(perm = x_217_perm_0, x = var_1667_cast_fp16)[name = string("transpose_78")]; tensor var_1687_cast_fp16 = pow(x = x_217_cast_fp16, y = var_1628_promoted_2_to_fp16)[name = string("op_1687_cast_fp16")]; tensor var_1689_axes_0 = const()[name = string("op_1689_axes_0"), val = tensor([-1])]; bool var_1689_keep_dims_0 = const()[name = string("op_1689_keep_dims_0"), val = bool(true)]; tensor var_1689_cast_fp16 = reduce_mean(axes = var_1689_axes_0, keep_dims = var_1689_keep_dims_0, x = var_1687_cast_fp16)[name = string("op_1689_cast_fp16")]; fp16 var_1690_to_fp16 = const()[name = string("op_1690_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1691_cast_fp16 = add(x = var_1689_cast_fp16, y = var_1690_to_fp16)[name = string("op_1691_cast_fp16")]; fp32 norm_69_epsilon_0 = const()[name = string("norm_69_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_69_cast_fp16 = rsqrt(epsilon = norm_69_epsilon_0, x = var_1691_cast_fp16)[name = string("norm_69_cast_fp16")]; tensor var_1693_cast_fp16 = mul(x = x_217_cast_fp16, y = norm_69_cast_fp16)[name = string("op_1693_cast_fp16")]; tensor layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115264)))]; tensor var_1694_cast_fp16 = mul(x = var_1693_cast_fp16, y = layers_8_self_attn_k_norm_weight_to_fp16)[name = string("op_1694_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1683_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1683_cast_fp16)[name = string("x2_33_cast_fp16")]; tensor var_1712_cast_fp16 = mul(x = x1_33_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1712_cast_fp16")]; tensor var_1713_cast_fp16 = mul(x = x2_33_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1713_cast_fp16")]; tensor var_1714_cast_fp16 = sub(x = var_1712_cast_fp16, y = var_1713_cast_fp16)[name = string("op_1714_cast_fp16")]; tensor var_1715_cast_fp16 = mul(x = x2_33_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1715_cast_fp16")]; tensor var_1716_cast_fp16 = mul(x = x1_33_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1716_cast_fp16")]; tensor var_1717_cast_fp16 = add(x = var_1715_cast_fp16, y = var_1716_cast_fp16)[name = string("op_1717_cast_fp16")]; bool q_17_interleave_0 = const()[name = string("q_17_interleave_0"), val = bool(false)]; tensor q_17_cast_fp16 = concat(axis = var_1629, interleave = q_17_interleave_0, values = (var_1714_cast_fp16, var_1717_cast_fp16))[name = string("q_17_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_1694_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_1694_cast_fp16)[name = string("x2_35_cast_fp16")]; tensor var_1736_cast_fp16 = mul(x = x1_35_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1736_cast_fp16")]; tensor var_1737_cast_fp16 = mul(x = x2_35_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1737_cast_fp16")]; tensor var_1738_cast_fp16 = sub(x = var_1736_cast_fp16, y = var_1737_cast_fp16)[name = string("op_1738_cast_fp16")]; tensor var_1739_cast_fp16 = mul(x = x2_35_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1739_cast_fp16")]; tensor var_1740_cast_fp16 = mul(x = x1_35_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1740_cast_fp16")]; tensor var_1741_cast_fp16 = add(x = var_1739_cast_fp16, y = var_1740_cast_fp16)[name = string("op_1741_cast_fp16")]; bool k_17_interleave_0 = const()[name = string("k_17_interleave_0"), val = bool(false)]; tensor k_17_cast_fp16 = concat(axis = var_1629, interleave = k_17_interleave_0, values = (var_1738_cast_fp16, var_1741_cast_fp16))[name = string("k_17_cast_fp16")]; tensor read_state_16 = read_state(input = k_cache_8)[name = string("read_state_16")]; tensor k_cache_51_cast_fp16 = mul(x = read_state_16, y = var_264_cast_fp16)[name = string("k_cache_51_cast_fp16")]; write_state(data = k_cache_51_cast_fp16, input = k_cache_8)[name = string("coreml_update_state_144_write_state")]; tensor coreml_update_state_144 = read_state(input = k_cache_8)[name = string("coreml_update_state_144")]; tensor var_1746_cast_fp16 = mul(x = k_17_cast_fp16, y = onehot_cast_fp16)[name = string("op_1746_cast_fp16")]; tensor k_cache_53_cast_fp16 = add(x = coreml_update_state_144, y = var_1746_cast_fp16)[name = string("k_cache_53_cast_fp16")]; write_state(data = k_cache_53_cast_fp16, input = k_cache_8)[name = string("coreml_update_state_145_write_state")]; tensor coreml_update_state_145 = read_state(input = k_cache_8)[name = string("coreml_update_state_145")]; tensor read_state_17 = read_state(input = v_cache_8)[name = string("read_state_17")]; tensor v_cache_51_cast_fp16 = mul(x = read_state_17, y = var_264_cast_fp16)[name = string("v_cache_51_cast_fp16")]; write_state(data = v_cache_51_cast_fp16, input = v_cache_8)[name = string("coreml_update_state_146_write_state")]; tensor coreml_update_state_146 = read_state(input = v_cache_8)[name = string("coreml_update_state_146")]; tensor v_17_cast_fp16 = transpose(perm = v_17_perm_0, x = var_1672_cast_fp16)[name = string("transpose_77")]; tensor var_1750_cast_fp16 = mul(x = v_17_cast_fp16, y = onehot_cast_fp16)[name = string("op_1750_cast_fp16")]; tensor v_cache_53_cast_fp16 = add(x = coreml_update_state_146, y = var_1750_cast_fp16)[name = string("v_cache_53_cast_fp16")]; write_state(data = v_cache_53_cast_fp16, input = v_cache_8)[name = string("coreml_update_state_147_write_state")]; tensor coreml_update_state_147 = read_state(input = v_cache_8)[name = string("coreml_update_state_147")]; tensor var_1752_axes_0 = const()[name = string("op_1752_axes_0"), val = tensor([2])]; tensor var_1752_cast_fp16 = expand_dims(axes = var_1752_axes_0, x = coreml_update_state_145)[name = string("op_1752_cast_fp16")]; tensor k_exp_33_reps_0 = const()[name = string("k_exp_33_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_33_cast_fp16 = tile(reps = k_exp_33_reps_0, x = var_1752_cast_fp16)[name = string("k_exp_33_cast_fp16")]; tensor var_1755 = const()[name = string("op_1755"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_35_cast_fp16 = reshape(shape = var_1755, x = k_exp_33_cast_fp16)[name = string("k_exp_35_cast_fp16")]; tensor var_1757_axes_0 = const()[name = string("op_1757_axes_0"), val = tensor([2])]; tensor var_1757_cast_fp16 = expand_dims(axes = var_1757_axes_0, x = coreml_update_state_147)[name = string("op_1757_cast_fp16")]; tensor v_exp_33_reps_0 = const()[name = string("v_exp_33_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_33_cast_fp16 = tile(reps = v_exp_33_reps_0, x = var_1757_cast_fp16)[name = string("v_exp_33_cast_fp16")]; tensor var_1760 = const()[name = string("op_1760"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_35_cast_fp16 = reshape(shape = var_1760, x = v_exp_33_cast_fp16)[name = string("v_exp_35_cast_fp16")]; bool var_1763_transpose_x_1 = const()[name = string("op_1763_transpose_x_1"), val = bool(false)]; bool var_1763_transpose_y_1 = const()[name = string("op_1763_transpose_y_1"), val = bool(true)]; tensor var_1763_cast_fp16 = matmul(transpose_x = var_1763_transpose_x_1, transpose_y = var_1763_transpose_y_1, x = q_17_cast_fp16, y = k_exp_35_cast_fp16)[name = string("op_1763_cast_fp16")]; fp16 var_1764_to_fp16 = const()[name = string("op_1764_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_33_cast_fp16 = mul(x = var_1763_cast_fp16, y = var_1764_to_fp16)[name = string("attn_33_cast_fp16")]; tensor input_81_cast_fp16 = add(x = attn_33_cast_fp16, y = attention_mask_to_fp16)[name = string("input_81_cast_fp16")]; tensor attn_35_cast_fp16 = softmax(axis = var_1629, x = input_81_cast_fp16)[name = string("attn_35_cast_fp16")]; bool out_17_transpose_x_0 = const()[name = string("out_17_transpose_x_0"), val = bool(false)]; bool out_17_transpose_y_0 = const()[name = string("out_17_transpose_y_0"), val = bool(false)]; tensor out_17_cast_fp16 = matmul(transpose_x = out_17_transpose_x_0, transpose_y = out_17_transpose_y_0, x = attn_35_cast_fp16, y = v_exp_35_cast_fp16)[name = string("out_17_cast_fp16")]; tensor var_1769_perm_0 = const()[name = string("op_1769_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1770 = const()[name = string("op_1770"), val = tensor([1, 1, -1])]; tensor var_1769_cast_fp16 = transpose(perm = var_1769_perm_0, x = out_17_cast_fp16)[name = string("transpose_76")]; tensor input_83_cast_fp16 = reshape(shape = var_1770, x = var_1769_cast_fp16)[name = string("input_83_cast_fp16")]; tensor layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132212800))))[name = string("layers_8_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor x_227_cast_fp16 = add(x = x_207_cast_fp16, y = linear_59_cast_fp16)[name = string("x_227_cast_fp16")]; fp16 var_1628_promoted_3_to_fp16 = const()[name = string("op_1628_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1777_cast_fp16 = pow(x = x_227_cast_fp16, y = var_1628_promoted_3_to_fp16)[name = string("op_1777_cast_fp16")]; tensor var_1779_axes_0 = const()[name = string("op_1779_axes_0"), val = tensor([-1])]; bool var_1779_keep_dims_0 = const()[name = string("op_1779_keep_dims_0"), val = bool(true)]; tensor var_1779_cast_fp16 = reduce_mean(axes = var_1779_axes_0, keep_dims = var_1779_keep_dims_0, x = var_1777_cast_fp16)[name = string("op_1779_cast_fp16")]; fp16 var_1780_to_fp16 = const()[name = string("op_1780_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1781_cast_fp16 = add(x = var_1779_cast_fp16, y = var_1780_to_fp16)[name = string("op_1781_cast_fp16")]; fp32 norm_71_epsilon_0 = const()[name = string("norm_71_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_71_cast_fp16 = rsqrt(epsilon = norm_71_epsilon_0, x = var_1781_cast_fp16)[name = string("norm_71_cast_fp16")]; tensor var_1783_cast_fp16 = mul(x = x_227_cast_fp16, y = norm_71_cast_fp16)[name = string("op_1783_cast_fp16")]; tensor layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132213376)))]; tensor var_1784_cast_fp16 = mul(x = var_1783_cast_fp16, y = layers_8_post_attention_layernorm_weight_to_fp16)[name = string("op_1784_cast_fp16")]; tensor layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132215488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361280))))[name = string("layers_8_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = var_1784_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_1794_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1794_cast_fp16")]; tensor layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138507648))))[name = string("layers_8_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_up_proj_weight_to_fp16_palettized, x = var_1784_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_89_cast_fp16 = mul(x = var_1794_cast_fp16, y = linear_61_cast_fp16)[name = string("input_89_cast_fp16")]; tensor layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138508224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654016))))[name = string("layers_8_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_227_cast_fp16, y = linear_62_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_1814 = const()[name = string("op_1814"), val = int32(-1)]; fp16 var_1813_promoted_to_fp16 = const()[name = string("op_1813_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1823_cast_fp16 = pow(x = x_233_cast_fp16, y = var_1813_promoted_to_fp16)[name = string("op_1823_cast_fp16")]; tensor var_1825_axes_0 = const()[name = string("op_1825_axes_0"), val = tensor([-1])]; bool var_1825_keep_dims_0 = const()[name = string("op_1825_keep_dims_0"), val = bool(true)]; tensor var_1825_cast_fp16 = reduce_mean(axes = var_1825_axes_0, keep_dims = var_1825_keep_dims_0, x = var_1823_cast_fp16)[name = string("op_1825_cast_fp16")]; fp16 var_1826_to_fp16 = const()[name = string("op_1826_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1827_cast_fp16 = add(x = var_1825_cast_fp16, y = var_1826_to_fp16)[name = string("op_1827_cast_fp16")]; fp32 norm_73_epsilon_0 = const()[name = string("norm_73_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_73_cast_fp16 = rsqrt(epsilon = norm_73_epsilon_0, x = var_1827_cast_fp16)[name = string("norm_73_cast_fp16")]; tensor var_1829_cast_fp16 = mul(x = x_233_cast_fp16, y = norm_73_cast_fp16)[name = string("op_1829_cast_fp16")]; tensor layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654592)))]; tensor var_1830_cast_fp16 = mul(x = var_1829_cast_fp16, y = layers_9_input_layernorm_weight_to_fp16)[name = string("op_1830_cast_fp16")]; tensor layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141656704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143753920))))[name = string("layers_9_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = var_1830_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor var_1846 = const()[name = string("op_1846"), val = tensor([1, 1, 16, 128])]; tensor var_1847_cast_fp16 = reshape(shape = var_1846, x = linear_63_cast_fp16)[name = string("op_1847_cast_fp16")]; tensor x_239_perm_0 = const()[name = string("x_239_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143754496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803136))))[name = string("layers_9_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = var_1830_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor var_1851 = const()[name = string("op_1851"), val = tensor([1, 1, 8, 128])]; tensor var_1852_cast_fp16 = reshape(shape = var_1851, x = linear_64_cast_fp16)[name = string("op_1852_cast_fp16")]; tensor x_243_perm_0 = const()[name = string("x_243_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852352))))[name = string("layers_9_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = var_1830_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor var_1856 = const()[name = string("op_1856"), val = tensor([1, 1, 8, 128])]; tensor var_1857_cast_fp16 = reshape(shape = var_1856, x = linear_65_cast_fp16)[name = string("op_1857_cast_fp16")]; tensor v_19_perm_0 = const()[name = string("v_19_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_1813_promoted_1_to_fp16 = const()[name = string("op_1813_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_239_cast_fp16 = transpose(perm = x_239_perm_0, x = var_1847_cast_fp16)[name = string("transpose_75")]; tensor var_1861_cast_fp16 = pow(x = x_239_cast_fp16, y = var_1813_promoted_1_to_fp16)[name = string("op_1861_cast_fp16")]; tensor var_1863_axes_0 = const()[name = string("op_1863_axes_0"), val = tensor([-1])]; bool var_1863_keep_dims_0 = const()[name = string("op_1863_keep_dims_0"), val = bool(true)]; tensor var_1863_cast_fp16 = reduce_mean(axes = var_1863_axes_0, keep_dims = var_1863_keep_dims_0, x = var_1861_cast_fp16)[name = string("op_1863_cast_fp16")]; fp16 var_1864_to_fp16 = const()[name = string("op_1864_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1865_cast_fp16 = add(x = var_1863_cast_fp16, y = var_1864_to_fp16)[name = string("op_1865_cast_fp16")]; fp32 norm_75_epsilon_0 = const()[name = string("norm_75_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_75_cast_fp16 = rsqrt(epsilon = norm_75_epsilon_0, x = var_1865_cast_fp16)[name = string("norm_75_cast_fp16")]; tensor var_1867_cast_fp16 = mul(x = x_239_cast_fp16, y = norm_75_cast_fp16)[name = string("op_1867_cast_fp16")]; tensor layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852928)))]; tensor var_1868_cast_fp16 = mul(x = var_1867_cast_fp16, y = layers_9_self_attn_q_norm_weight_to_fp16)[name = string("op_1868_cast_fp16")]; fp16 var_1813_promoted_2_to_fp16 = const()[name = string("op_1813_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_243_cast_fp16 = transpose(perm = x_243_perm_0, x = var_1852_cast_fp16)[name = string("transpose_74")]; tensor var_1872_cast_fp16 = pow(x = x_243_cast_fp16, y = var_1813_promoted_2_to_fp16)[name = string("op_1872_cast_fp16")]; tensor var_1874_axes_0 = const()[name = string("op_1874_axes_0"), val = tensor([-1])]; bool var_1874_keep_dims_0 = const()[name = string("op_1874_keep_dims_0"), val = bool(true)]; tensor var_1874_cast_fp16 = reduce_mean(axes = var_1874_axes_0, keep_dims = var_1874_keep_dims_0, x = var_1872_cast_fp16)[name = string("op_1874_cast_fp16")]; fp16 var_1875_to_fp16 = const()[name = string("op_1875_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1876_cast_fp16 = add(x = var_1874_cast_fp16, y = var_1875_to_fp16)[name = string("op_1876_cast_fp16")]; fp32 norm_77_epsilon_0 = const()[name = string("norm_77_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_77_cast_fp16 = rsqrt(epsilon = norm_77_epsilon_0, x = var_1876_cast_fp16)[name = string("norm_77_cast_fp16")]; tensor var_1878_cast_fp16 = mul(x = x_243_cast_fp16, y = norm_77_cast_fp16)[name = string("op_1878_cast_fp16")]; tensor layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853248)))]; tensor var_1879_cast_fp16 = mul(x = var_1878_cast_fp16, y = layers_9_self_attn_k_norm_weight_to_fp16)[name = string("op_1879_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_1868_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_1868_cast_fp16)[name = string("x2_37_cast_fp16")]; tensor var_1897_cast_fp16 = mul(x = x1_37_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1897_cast_fp16")]; tensor var_1898_cast_fp16 = mul(x = x2_37_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1898_cast_fp16")]; tensor var_1899_cast_fp16 = sub(x = var_1897_cast_fp16, y = var_1898_cast_fp16)[name = string("op_1899_cast_fp16")]; tensor var_1900_cast_fp16 = mul(x = x2_37_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1900_cast_fp16")]; tensor var_1901_cast_fp16 = mul(x = x1_37_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1901_cast_fp16")]; tensor var_1902_cast_fp16 = add(x = var_1900_cast_fp16, y = var_1901_cast_fp16)[name = string("op_1902_cast_fp16")]; bool q_19_interleave_0 = const()[name = string("q_19_interleave_0"), val = bool(false)]; tensor q_19_cast_fp16 = concat(axis = var_1814, interleave = q_19_interleave_0, values = (var_1899_cast_fp16, var_1902_cast_fp16))[name = string("q_19_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_1879_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_1879_cast_fp16)[name = string("x2_39_cast_fp16")]; tensor var_1921_cast_fp16 = mul(x = x1_39_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1921_cast_fp16")]; tensor var_1922_cast_fp16 = mul(x = x2_39_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1922_cast_fp16")]; tensor var_1923_cast_fp16 = sub(x = var_1921_cast_fp16, y = var_1922_cast_fp16)[name = string("op_1923_cast_fp16")]; tensor var_1924_cast_fp16 = mul(x = x2_39_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1924_cast_fp16")]; tensor var_1925_cast_fp16 = mul(x = x1_39_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1925_cast_fp16")]; tensor var_1926_cast_fp16 = add(x = var_1924_cast_fp16, y = var_1925_cast_fp16)[name = string("op_1926_cast_fp16")]; bool k_19_interleave_0 = const()[name = string("k_19_interleave_0"), val = bool(false)]; tensor k_19_cast_fp16 = concat(axis = var_1814, interleave = k_19_interleave_0, values = (var_1923_cast_fp16, var_1926_cast_fp16))[name = string("k_19_cast_fp16")]; tensor read_state_18 = read_state(input = k_cache_9)[name = string("read_state_18")]; tensor k_cache_57_cast_fp16 = mul(x = read_state_18, y = var_264_cast_fp16)[name = string("k_cache_57_cast_fp16")]; write_state(data = k_cache_57_cast_fp16, input = k_cache_9)[name = string("coreml_update_state_148_write_state")]; tensor coreml_update_state_148 = read_state(input = k_cache_9)[name = string("coreml_update_state_148")]; tensor var_1931_cast_fp16 = mul(x = k_19_cast_fp16, y = onehot_cast_fp16)[name = string("op_1931_cast_fp16")]; tensor k_cache_59_cast_fp16 = add(x = coreml_update_state_148, y = var_1931_cast_fp16)[name = string("k_cache_59_cast_fp16")]; write_state(data = k_cache_59_cast_fp16, input = k_cache_9)[name = string("coreml_update_state_149_write_state")]; tensor coreml_update_state_149 = read_state(input = k_cache_9)[name = string("coreml_update_state_149")]; tensor read_state_19 = read_state(input = v_cache_9)[name = string("read_state_19")]; tensor v_cache_57_cast_fp16 = mul(x = read_state_19, y = var_264_cast_fp16)[name = string("v_cache_57_cast_fp16")]; write_state(data = v_cache_57_cast_fp16, input = v_cache_9)[name = string("coreml_update_state_150_write_state")]; tensor coreml_update_state_150 = read_state(input = v_cache_9)[name = string("coreml_update_state_150")]; tensor v_19_cast_fp16 = transpose(perm = v_19_perm_0, x = var_1857_cast_fp16)[name = string("transpose_73")]; tensor var_1935_cast_fp16 = mul(x = v_19_cast_fp16, y = onehot_cast_fp16)[name = string("op_1935_cast_fp16")]; tensor v_cache_59_cast_fp16 = add(x = coreml_update_state_150, y = var_1935_cast_fp16)[name = string("v_cache_59_cast_fp16")]; write_state(data = v_cache_59_cast_fp16, input = v_cache_9)[name = string("coreml_update_state_151_write_state")]; tensor coreml_update_state_151 = read_state(input = v_cache_9)[name = string("coreml_update_state_151")]; tensor var_1937_axes_0 = const()[name = string("op_1937_axes_0"), val = tensor([2])]; tensor var_1937_cast_fp16 = expand_dims(axes = var_1937_axes_0, x = coreml_update_state_149)[name = string("op_1937_cast_fp16")]; tensor k_exp_37_reps_0 = const()[name = string("k_exp_37_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_37_cast_fp16 = tile(reps = k_exp_37_reps_0, x = var_1937_cast_fp16)[name = string("k_exp_37_cast_fp16")]; tensor var_1940 = const()[name = string("op_1940"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_39_cast_fp16 = reshape(shape = var_1940, x = k_exp_37_cast_fp16)[name = string("k_exp_39_cast_fp16")]; tensor var_1942_axes_0 = const()[name = string("op_1942_axes_0"), val = tensor([2])]; tensor var_1942_cast_fp16 = expand_dims(axes = var_1942_axes_0, x = coreml_update_state_151)[name = string("op_1942_cast_fp16")]; tensor v_exp_37_reps_0 = const()[name = string("v_exp_37_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_37_cast_fp16 = tile(reps = v_exp_37_reps_0, x = var_1942_cast_fp16)[name = string("v_exp_37_cast_fp16")]; tensor var_1945 = const()[name = string("op_1945"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_39_cast_fp16 = reshape(shape = var_1945, x = v_exp_37_cast_fp16)[name = string("v_exp_39_cast_fp16")]; bool var_1948_transpose_x_1 = const()[name = string("op_1948_transpose_x_1"), val = bool(false)]; bool var_1948_transpose_y_1 = const()[name = string("op_1948_transpose_y_1"), val = bool(true)]; tensor var_1948_cast_fp16 = matmul(transpose_x = var_1948_transpose_x_1, transpose_y = var_1948_transpose_y_1, x = q_19_cast_fp16, y = k_exp_39_cast_fp16)[name = string("op_1948_cast_fp16")]; fp16 var_1949_to_fp16 = const()[name = string("op_1949_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_37_cast_fp16 = mul(x = var_1948_cast_fp16, y = var_1949_to_fp16)[name = string("attn_37_cast_fp16")]; tensor input_91_cast_fp16 = add(x = attn_37_cast_fp16, y = attention_mask_to_fp16)[name = string("input_91_cast_fp16")]; tensor attn_39_cast_fp16 = softmax(axis = var_1814, x = input_91_cast_fp16)[name = string("attn_39_cast_fp16")]; bool out_19_transpose_x_0 = const()[name = string("out_19_transpose_x_0"), val = bool(false)]; bool out_19_transpose_y_0 = const()[name = string("out_19_transpose_y_0"), val = bool(false)]; tensor out_19_cast_fp16 = matmul(transpose_x = out_19_transpose_x_0, transpose_y = out_19_transpose_y_0, x = attn_39_cast_fp16, y = v_exp_39_cast_fp16)[name = string("out_19_cast_fp16")]; tensor var_1954_perm_0 = const()[name = string("op_1954_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1955 = const()[name = string("op_1955"), val = tensor([1, 1, -1])]; tensor var_1954_cast_fp16 = transpose(perm = var_1954_perm_0, x = out_19_cast_fp16)[name = string("transpose_72")]; tensor input_93_cast_fp16 = reshape(shape = var_1955, x = var_1954_cast_fp16)[name = string("input_93_cast_fp16")]; tensor layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147950784))))[name = string("layers_9_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor x_253_cast_fp16 = add(x = x_233_cast_fp16, y = linear_66_cast_fp16)[name = string("x_253_cast_fp16")]; fp16 var_1813_promoted_3_to_fp16 = const()[name = string("op_1813_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1962_cast_fp16 = pow(x = x_253_cast_fp16, y = var_1813_promoted_3_to_fp16)[name = string("op_1962_cast_fp16")]; tensor var_1964_axes_0 = const()[name = string("op_1964_axes_0"), val = tensor([-1])]; bool var_1964_keep_dims_0 = const()[name = string("op_1964_keep_dims_0"), val = bool(true)]; tensor var_1964_cast_fp16 = reduce_mean(axes = var_1964_axes_0, keep_dims = var_1964_keep_dims_0, x = var_1962_cast_fp16)[name = string("op_1964_cast_fp16")]; fp16 var_1965_to_fp16 = const()[name = string("op_1965_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1966_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1965_to_fp16)[name = string("op_1966_cast_fp16")]; fp32 norm_79_epsilon_0 = const()[name = string("norm_79_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_79_cast_fp16 = rsqrt(epsilon = norm_79_epsilon_0, x = var_1966_cast_fp16)[name = string("norm_79_cast_fp16")]; tensor var_1968_cast_fp16 = mul(x = x_253_cast_fp16, y = norm_79_cast_fp16)[name = string("op_1968_cast_fp16")]; tensor layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147951360)))]; tensor var_1969_cast_fp16 = mul(x = var_1968_cast_fp16, y = layers_9_post_attention_layernorm_weight_to_fp16)[name = string("op_1969_cast_fp16")]; tensor layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147953472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099264))))[name = string("layers_9_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = var_1969_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_1979_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_1979_cast_fp16")]; tensor layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154245632))))[name = string("layers_9_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_up_proj_weight_to_fp16_palettized, x = var_1969_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_99_cast_fp16 = mul(x = var_1979_cast_fp16, y = linear_68_cast_fp16)[name = string("input_99_cast_fp16")]; tensor layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154246208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392000))))[name = string("layers_9_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_69_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_1999 = const()[name = string("op_1999"), val = int32(-1)]; fp16 var_1998_promoted_to_fp16 = const()[name = string("op_1998_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2008_cast_fp16 = pow(x = x_259_cast_fp16, y = var_1998_promoted_to_fp16)[name = string("op_2008_cast_fp16")]; tensor var_2010_axes_0 = const()[name = string("op_2010_axes_0"), val = tensor([-1])]; bool var_2010_keep_dims_0 = const()[name = string("op_2010_keep_dims_0"), val = bool(true)]; tensor var_2010_cast_fp16 = reduce_mean(axes = var_2010_axes_0, keep_dims = var_2010_keep_dims_0, x = var_2008_cast_fp16)[name = string("op_2010_cast_fp16")]; fp16 var_2011_to_fp16 = const()[name = string("op_2011_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2012_cast_fp16 = add(x = var_2010_cast_fp16, y = var_2011_to_fp16)[name = string("op_2012_cast_fp16")]; fp32 norm_81_epsilon_0 = const()[name = string("norm_81_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_81_cast_fp16 = rsqrt(epsilon = norm_81_epsilon_0, x = var_2012_cast_fp16)[name = string("norm_81_cast_fp16")]; tensor var_2014_cast_fp16 = mul(x = x_259_cast_fp16, y = norm_81_cast_fp16)[name = string("op_2014_cast_fp16")]; tensor layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392576)))]; tensor var_2015_cast_fp16 = mul(x = var_2014_cast_fp16, y = layers_10_input_layernorm_weight_to_fp16)[name = string("op_2015_cast_fp16")]; tensor layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157394688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159491904))))[name = string("layers_10_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = var_2015_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor var_2031 = const()[name = string("op_2031"), val = tensor([1, 1, 16, 128])]; tensor var_2032_cast_fp16 = reshape(shape = var_2031, x = linear_70_cast_fp16)[name = string("op_2032_cast_fp16")]; tensor x_265_perm_0 = const()[name = string("x_265_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159492480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541120))))[name = string("layers_10_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = var_2015_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor var_2036 = const()[name = string("op_2036"), val = tensor([1, 1, 8, 128])]; tensor var_2037_cast_fp16 = reshape(shape = var_2036, x = linear_71_cast_fp16)[name = string("op_2037_cast_fp16")]; tensor x_269_perm_0 = const()[name = string("x_269_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590336))))[name = string("layers_10_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = var_2015_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor var_2041 = const()[name = string("op_2041"), val = tensor([1, 1, 8, 128])]; tensor var_2042_cast_fp16 = reshape(shape = var_2041, x = linear_72_cast_fp16)[name = string("op_2042_cast_fp16")]; tensor v_21_perm_0 = const()[name = string("v_21_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_1998_promoted_1_to_fp16 = const()[name = string("op_1998_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_265_cast_fp16 = transpose(perm = x_265_perm_0, x = var_2032_cast_fp16)[name = string("transpose_71")]; tensor var_2046_cast_fp16 = pow(x = x_265_cast_fp16, y = var_1998_promoted_1_to_fp16)[name = string("op_2046_cast_fp16")]; tensor var_2048_axes_0 = const()[name = string("op_2048_axes_0"), val = tensor([-1])]; bool var_2048_keep_dims_0 = const()[name = string("op_2048_keep_dims_0"), val = bool(true)]; tensor var_2048_cast_fp16 = reduce_mean(axes = var_2048_axes_0, keep_dims = var_2048_keep_dims_0, x = var_2046_cast_fp16)[name = string("op_2048_cast_fp16")]; fp16 var_2049_to_fp16 = const()[name = string("op_2049_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2050_cast_fp16 = add(x = var_2048_cast_fp16, y = var_2049_to_fp16)[name = string("op_2050_cast_fp16")]; fp32 norm_83_epsilon_0 = const()[name = string("norm_83_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_83_cast_fp16 = rsqrt(epsilon = norm_83_epsilon_0, x = var_2050_cast_fp16)[name = string("norm_83_cast_fp16")]; tensor var_2052_cast_fp16 = mul(x = x_265_cast_fp16, y = norm_83_cast_fp16)[name = string("op_2052_cast_fp16")]; tensor layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590912)))]; tensor var_2053_cast_fp16 = mul(x = var_2052_cast_fp16, y = layers_10_self_attn_q_norm_weight_to_fp16)[name = string("op_2053_cast_fp16")]; fp16 var_1998_promoted_2_to_fp16 = const()[name = string("op_1998_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_269_cast_fp16 = transpose(perm = x_269_perm_0, x = var_2037_cast_fp16)[name = string("transpose_70")]; tensor var_2057_cast_fp16 = pow(x = x_269_cast_fp16, y = var_1998_promoted_2_to_fp16)[name = string("op_2057_cast_fp16")]; tensor var_2059_axes_0 = const()[name = string("op_2059_axes_0"), val = tensor([-1])]; bool var_2059_keep_dims_0 = const()[name = string("op_2059_keep_dims_0"), val = bool(true)]; tensor var_2059_cast_fp16 = reduce_mean(axes = var_2059_axes_0, keep_dims = var_2059_keep_dims_0, x = var_2057_cast_fp16)[name = string("op_2059_cast_fp16")]; fp16 var_2060_to_fp16 = const()[name = string("op_2060_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2061_cast_fp16 = add(x = var_2059_cast_fp16, y = var_2060_to_fp16)[name = string("op_2061_cast_fp16")]; fp32 norm_85_epsilon_0 = const()[name = string("norm_85_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_85_cast_fp16 = rsqrt(epsilon = norm_85_epsilon_0, x = var_2061_cast_fp16)[name = string("norm_85_cast_fp16")]; tensor var_2063_cast_fp16 = mul(x = x_269_cast_fp16, y = norm_85_cast_fp16)[name = string("op_2063_cast_fp16")]; tensor layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591232)))]; tensor var_2064_cast_fp16 = mul(x = var_2063_cast_fp16, y = layers_10_self_attn_k_norm_weight_to_fp16)[name = string("op_2064_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_2053_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_2053_cast_fp16)[name = string("x2_41_cast_fp16")]; tensor var_2082_cast_fp16 = mul(x = x1_41_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2082_cast_fp16")]; tensor var_2083_cast_fp16 = mul(x = x2_41_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2083_cast_fp16")]; tensor var_2084_cast_fp16 = sub(x = var_2082_cast_fp16, y = var_2083_cast_fp16)[name = string("op_2084_cast_fp16")]; tensor var_2085_cast_fp16 = mul(x = x2_41_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2085_cast_fp16")]; tensor var_2086_cast_fp16 = mul(x = x1_41_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2086_cast_fp16")]; tensor var_2087_cast_fp16 = add(x = var_2085_cast_fp16, y = var_2086_cast_fp16)[name = string("op_2087_cast_fp16")]; bool q_21_interleave_0 = const()[name = string("q_21_interleave_0"), val = bool(false)]; tensor q_21_cast_fp16 = concat(axis = var_1999, interleave = q_21_interleave_0, values = (var_2084_cast_fp16, var_2087_cast_fp16))[name = string("q_21_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_2064_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_2064_cast_fp16)[name = string("x2_43_cast_fp16")]; tensor var_2106_cast_fp16 = mul(x = x1_43_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2106_cast_fp16")]; tensor var_2107_cast_fp16 = mul(x = x2_43_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2107_cast_fp16")]; tensor var_2108_cast_fp16 = sub(x = var_2106_cast_fp16, y = var_2107_cast_fp16)[name = string("op_2108_cast_fp16")]; tensor var_2109_cast_fp16 = mul(x = x2_43_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2109_cast_fp16")]; tensor var_2110_cast_fp16 = mul(x = x1_43_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2110_cast_fp16")]; tensor var_2111_cast_fp16 = add(x = var_2109_cast_fp16, y = var_2110_cast_fp16)[name = string("op_2111_cast_fp16")]; bool k_21_interleave_0 = const()[name = string("k_21_interleave_0"), val = bool(false)]; tensor k_21_cast_fp16 = concat(axis = var_1999, interleave = k_21_interleave_0, values = (var_2108_cast_fp16, var_2111_cast_fp16))[name = string("k_21_cast_fp16")]; tensor read_state_20 = read_state(input = k_cache_10)[name = string("read_state_20")]; tensor k_cache_63_cast_fp16 = mul(x = read_state_20, y = var_264_cast_fp16)[name = string("k_cache_63_cast_fp16")]; write_state(data = k_cache_63_cast_fp16, input = k_cache_10)[name = string("coreml_update_state_152_write_state")]; tensor coreml_update_state_152 = read_state(input = k_cache_10)[name = string("coreml_update_state_152")]; tensor var_2116_cast_fp16 = mul(x = k_21_cast_fp16, y = onehot_cast_fp16)[name = string("op_2116_cast_fp16")]; tensor k_cache_65_cast_fp16 = add(x = coreml_update_state_152, y = var_2116_cast_fp16)[name = string("k_cache_65_cast_fp16")]; write_state(data = k_cache_65_cast_fp16, input = k_cache_10)[name = string("coreml_update_state_153_write_state")]; tensor coreml_update_state_153 = read_state(input = k_cache_10)[name = string("coreml_update_state_153")]; tensor read_state_21 = read_state(input = v_cache_10)[name = string("read_state_21")]; tensor v_cache_63_cast_fp16 = mul(x = read_state_21, y = var_264_cast_fp16)[name = string("v_cache_63_cast_fp16")]; write_state(data = v_cache_63_cast_fp16, input = v_cache_10)[name = string("coreml_update_state_154_write_state")]; tensor coreml_update_state_154 = read_state(input = v_cache_10)[name = string("coreml_update_state_154")]; tensor v_21_cast_fp16 = transpose(perm = v_21_perm_0, x = var_2042_cast_fp16)[name = string("transpose_69")]; tensor var_2120_cast_fp16 = mul(x = v_21_cast_fp16, y = onehot_cast_fp16)[name = string("op_2120_cast_fp16")]; tensor v_cache_65_cast_fp16 = add(x = coreml_update_state_154, y = var_2120_cast_fp16)[name = string("v_cache_65_cast_fp16")]; write_state(data = v_cache_65_cast_fp16, input = v_cache_10)[name = string("coreml_update_state_155_write_state")]; tensor coreml_update_state_155 = read_state(input = v_cache_10)[name = string("coreml_update_state_155")]; tensor var_2122_axes_0 = const()[name = string("op_2122_axes_0"), val = tensor([2])]; tensor var_2122_cast_fp16 = expand_dims(axes = var_2122_axes_0, x = coreml_update_state_153)[name = string("op_2122_cast_fp16")]; tensor k_exp_41_reps_0 = const()[name = string("k_exp_41_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_41_cast_fp16 = tile(reps = k_exp_41_reps_0, x = var_2122_cast_fp16)[name = string("k_exp_41_cast_fp16")]; tensor var_2125 = const()[name = string("op_2125"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_43_cast_fp16 = reshape(shape = var_2125, x = k_exp_41_cast_fp16)[name = string("k_exp_43_cast_fp16")]; tensor var_2127_axes_0 = const()[name = string("op_2127_axes_0"), val = tensor([2])]; tensor var_2127_cast_fp16 = expand_dims(axes = var_2127_axes_0, x = coreml_update_state_155)[name = string("op_2127_cast_fp16")]; tensor v_exp_41_reps_0 = const()[name = string("v_exp_41_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_41_cast_fp16 = tile(reps = v_exp_41_reps_0, x = var_2127_cast_fp16)[name = string("v_exp_41_cast_fp16")]; tensor var_2130 = const()[name = string("op_2130"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_43_cast_fp16 = reshape(shape = var_2130, x = v_exp_41_cast_fp16)[name = string("v_exp_43_cast_fp16")]; bool var_2133_transpose_x_1 = const()[name = string("op_2133_transpose_x_1"), val = bool(false)]; bool var_2133_transpose_y_1 = const()[name = string("op_2133_transpose_y_1"), val = bool(true)]; tensor var_2133_cast_fp16 = matmul(transpose_x = var_2133_transpose_x_1, transpose_y = var_2133_transpose_y_1, x = q_21_cast_fp16, y = k_exp_43_cast_fp16)[name = string("op_2133_cast_fp16")]; fp16 var_2134_to_fp16 = const()[name = string("op_2134_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_41_cast_fp16 = mul(x = var_2133_cast_fp16, y = var_2134_to_fp16)[name = string("attn_41_cast_fp16")]; tensor input_101_cast_fp16 = add(x = attn_41_cast_fp16, y = attention_mask_to_fp16)[name = string("input_101_cast_fp16")]; tensor attn_43_cast_fp16 = softmax(axis = var_1999, x = input_101_cast_fp16)[name = string("attn_43_cast_fp16")]; bool out_21_transpose_x_0 = const()[name = string("out_21_transpose_x_0"), val = bool(false)]; bool out_21_transpose_y_0 = const()[name = string("out_21_transpose_y_0"), val = bool(false)]; tensor out_21_cast_fp16 = matmul(transpose_x = out_21_transpose_x_0, transpose_y = out_21_transpose_y_0, x = attn_43_cast_fp16, y = v_exp_43_cast_fp16)[name = string("out_21_cast_fp16")]; tensor var_2139_perm_0 = const()[name = string("op_2139_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2140 = const()[name = string("op_2140"), val = tensor([1, 1, -1])]; tensor var_2139_cast_fp16 = transpose(perm = var_2139_perm_0, x = out_21_cast_fp16)[name = string("transpose_68")]; tensor input_103_cast_fp16 = reshape(shape = var_2140, x = var_2139_cast_fp16)[name = string("input_103_cast_fp16")]; tensor layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163688768))))[name = string("layers_10_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor x_279_cast_fp16 = add(x = x_259_cast_fp16, y = linear_73_cast_fp16)[name = string("x_279_cast_fp16")]; fp16 var_1998_promoted_3_to_fp16 = const()[name = string("op_1998_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2147_cast_fp16 = pow(x = x_279_cast_fp16, y = var_1998_promoted_3_to_fp16)[name = string("op_2147_cast_fp16")]; tensor var_2149_axes_0 = const()[name = string("op_2149_axes_0"), val = tensor([-1])]; bool var_2149_keep_dims_0 = const()[name = string("op_2149_keep_dims_0"), val = bool(true)]; tensor var_2149_cast_fp16 = reduce_mean(axes = var_2149_axes_0, keep_dims = var_2149_keep_dims_0, x = var_2147_cast_fp16)[name = string("op_2149_cast_fp16")]; fp16 var_2150_to_fp16 = const()[name = string("op_2150_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2151_cast_fp16 = add(x = var_2149_cast_fp16, y = var_2150_to_fp16)[name = string("op_2151_cast_fp16")]; fp32 norm_87_epsilon_0 = const()[name = string("norm_87_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_87_cast_fp16 = rsqrt(epsilon = norm_87_epsilon_0, x = var_2151_cast_fp16)[name = string("norm_87_cast_fp16")]; tensor var_2153_cast_fp16 = mul(x = x_279_cast_fp16, y = norm_87_cast_fp16)[name = string("op_2153_cast_fp16")]; tensor layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689344)))]; tensor var_2154_cast_fp16 = mul(x = var_2153_cast_fp16, y = layers_10_post_attention_layernorm_weight_to_fp16)[name = string("op_2154_cast_fp16")]; tensor layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163691456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837248))))[name = string("layers_10_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = var_2154_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2164_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2164_cast_fp16")]; tensor layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169983616))))[name = string("layers_10_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_up_proj_weight_to_fp16_palettized, x = var_2154_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_109_cast_fp16 = mul(x = var_2164_cast_fp16, y = linear_75_cast_fp16)[name = string("input_109_cast_fp16")]; tensor layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169984192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173129984))))[name = string("layers_10_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_76_cast_fp16)[name = string("x_285_cast_fp16")]; int32 var_2184 = const()[name = string("op_2184"), val = int32(-1)]; fp16 var_2183_promoted_to_fp16 = const()[name = string("op_2183_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2193_cast_fp16 = pow(x = x_285_cast_fp16, y = var_2183_promoted_to_fp16)[name = string("op_2193_cast_fp16")]; tensor var_2195_axes_0 = const()[name = string("op_2195_axes_0"), val = tensor([-1])]; bool var_2195_keep_dims_0 = const()[name = string("op_2195_keep_dims_0"), val = bool(true)]; tensor var_2195_cast_fp16 = reduce_mean(axes = var_2195_axes_0, keep_dims = var_2195_keep_dims_0, x = var_2193_cast_fp16)[name = string("op_2195_cast_fp16")]; fp16 var_2196_to_fp16 = const()[name = string("op_2196_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2197_cast_fp16 = add(x = var_2195_cast_fp16, y = var_2196_to_fp16)[name = string("op_2197_cast_fp16")]; fp32 norm_89_epsilon_0 = const()[name = string("norm_89_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_89_cast_fp16 = rsqrt(epsilon = norm_89_epsilon_0, x = var_2197_cast_fp16)[name = string("norm_89_cast_fp16")]; tensor var_2199_cast_fp16 = mul(x = x_285_cast_fp16, y = norm_89_cast_fp16)[name = string("op_2199_cast_fp16")]; tensor layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173130560)))]; tensor var_2200_cast_fp16 = mul(x = var_2199_cast_fp16, y = layers_11_input_layernorm_weight_to_fp16)[name = string("op_2200_cast_fp16")]; tensor layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173132672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175229888))))[name = string("layers_11_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = var_2200_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor var_2216 = const()[name = string("op_2216"), val = tensor([1, 1, 16, 128])]; tensor var_2217_cast_fp16 = reshape(shape = var_2216, x = linear_77_cast_fp16)[name = string("op_2217_cast_fp16")]; tensor x_291_perm_0 = const()[name = string("x_291_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175230464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279104))))[name = string("layers_11_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = var_2200_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor var_2221 = const()[name = string("op_2221"), val = tensor([1, 1, 8, 128])]; tensor var_2222_cast_fp16 = reshape(shape = var_2221, x = linear_78_cast_fp16)[name = string("op_2222_cast_fp16")]; tensor x_295_perm_0 = const()[name = string("x_295_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328320))))[name = string("layers_11_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = var_2200_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor var_2226 = const()[name = string("op_2226"), val = tensor([1, 1, 8, 128])]; tensor var_2227_cast_fp16 = reshape(shape = var_2226, x = linear_79_cast_fp16)[name = string("op_2227_cast_fp16")]; tensor v_23_perm_0 = const()[name = string("v_23_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_2183_promoted_1_to_fp16 = const()[name = string("op_2183_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_291_cast_fp16 = transpose(perm = x_291_perm_0, x = var_2217_cast_fp16)[name = string("transpose_67")]; tensor var_2231_cast_fp16 = pow(x = x_291_cast_fp16, y = var_2183_promoted_1_to_fp16)[name = string("op_2231_cast_fp16")]; tensor var_2233_axes_0 = const()[name = string("op_2233_axes_0"), val = tensor([-1])]; bool var_2233_keep_dims_0 = const()[name = string("op_2233_keep_dims_0"), val = bool(true)]; tensor var_2233_cast_fp16 = reduce_mean(axes = var_2233_axes_0, keep_dims = var_2233_keep_dims_0, x = var_2231_cast_fp16)[name = string("op_2233_cast_fp16")]; fp16 var_2234_to_fp16 = const()[name = string("op_2234_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2235_cast_fp16 = add(x = var_2233_cast_fp16, y = var_2234_to_fp16)[name = string("op_2235_cast_fp16")]; fp32 norm_91_epsilon_0 = const()[name = string("norm_91_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_91_cast_fp16 = rsqrt(epsilon = norm_91_epsilon_0, x = var_2235_cast_fp16)[name = string("norm_91_cast_fp16")]; tensor var_2237_cast_fp16 = mul(x = x_291_cast_fp16, y = norm_91_cast_fp16)[name = string("op_2237_cast_fp16")]; tensor layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328896)))]; tensor var_2238_cast_fp16 = mul(x = var_2237_cast_fp16, y = layers_11_self_attn_q_norm_weight_to_fp16)[name = string("op_2238_cast_fp16")]; fp16 var_2183_promoted_2_to_fp16 = const()[name = string("op_2183_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_295_cast_fp16 = transpose(perm = x_295_perm_0, x = var_2222_cast_fp16)[name = string("transpose_66")]; tensor var_2242_cast_fp16 = pow(x = x_295_cast_fp16, y = var_2183_promoted_2_to_fp16)[name = string("op_2242_cast_fp16")]; tensor var_2244_axes_0 = const()[name = string("op_2244_axes_0"), val = tensor([-1])]; bool var_2244_keep_dims_0 = const()[name = string("op_2244_keep_dims_0"), val = bool(true)]; tensor var_2244_cast_fp16 = reduce_mean(axes = var_2244_axes_0, keep_dims = var_2244_keep_dims_0, x = var_2242_cast_fp16)[name = string("op_2244_cast_fp16")]; fp16 var_2245_to_fp16 = const()[name = string("op_2245_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2246_cast_fp16 = add(x = var_2244_cast_fp16, y = var_2245_to_fp16)[name = string("op_2246_cast_fp16")]; fp32 norm_93_epsilon_0 = const()[name = string("norm_93_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_93_cast_fp16 = rsqrt(epsilon = norm_93_epsilon_0, x = var_2246_cast_fp16)[name = string("norm_93_cast_fp16")]; tensor var_2248_cast_fp16 = mul(x = x_295_cast_fp16, y = norm_93_cast_fp16)[name = string("op_2248_cast_fp16")]; tensor layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329216)))]; tensor var_2249_cast_fp16 = mul(x = var_2248_cast_fp16, y = layers_11_self_attn_k_norm_weight_to_fp16)[name = string("op_2249_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_2238_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_2238_cast_fp16)[name = string("x2_45_cast_fp16")]; tensor var_2267_cast_fp16 = mul(x = x1_45_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2267_cast_fp16")]; tensor var_2268_cast_fp16 = mul(x = x2_45_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2268_cast_fp16")]; tensor var_2269_cast_fp16 = sub(x = var_2267_cast_fp16, y = var_2268_cast_fp16)[name = string("op_2269_cast_fp16")]; tensor var_2270_cast_fp16 = mul(x = x2_45_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2270_cast_fp16")]; tensor var_2271_cast_fp16 = mul(x = x1_45_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2271_cast_fp16")]; tensor var_2272_cast_fp16 = add(x = var_2270_cast_fp16, y = var_2271_cast_fp16)[name = string("op_2272_cast_fp16")]; bool q_23_interleave_0 = const()[name = string("q_23_interleave_0"), val = bool(false)]; tensor q_23_cast_fp16 = concat(axis = var_2184, interleave = q_23_interleave_0, values = (var_2269_cast_fp16, var_2272_cast_fp16))[name = string("q_23_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_2249_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_2249_cast_fp16)[name = string("x2_47_cast_fp16")]; tensor var_2291_cast_fp16 = mul(x = x1_47_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2291_cast_fp16")]; tensor var_2292_cast_fp16 = mul(x = x2_47_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2292_cast_fp16")]; tensor var_2293_cast_fp16 = sub(x = var_2291_cast_fp16, y = var_2292_cast_fp16)[name = string("op_2293_cast_fp16")]; tensor var_2294_cast_fp16 = mul(x = x2_47_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2294_cast_fp16")]; tensor var_2295_cast_fp16 = mul(x = x1_47_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2295_cast_fp16")]; tensor var_2296_cast_fp16 = add(x = var_2294_cast_fp16, y = var_2295_cast_fp16)[name = string("op_2296_cast_fp16")]; bool k_23_interleave_0 = const()[name = string("k_23_interleave_0"), val = bool(false)]; tensor k_23_cast_fp16 = concat(axis = var_2184, interleave = k_23_interleave_0, values = (var_2293_cast_fp16, var_2296_cast_fp16))[name = string("k_23_cast_fp16")]; tensor read_state_22 = read_state(input = k_cache_11)[name = string("read_state_22")]; tensor k_cache_69_cast_fp16 = mul(x = read_state_22, y = var_264_cast_fp16)[name = string("k_cache_69_cast_fp16")]; write_state(data = k_cache_69_cast_fp16, input = k_cache_11)[name = string("coreml_update_state_156_write_state")]; tensor coreml_update_state_156 = read_state(input = k_cache_11)[name = string("coreml_update_state_156")]; tensor var_2301_cast_fp16 = mul(x = k_23_cast_fp16, y = onehot_cast_fp16)[name = string("op_2301_cast_fp16")]; tensor k_cache_71_cast_fp16 = add(x = coreml_update_state_156, y = var_2301_cast_fp16)[name = string("k_cache_71_cast_fp16")]; write_state(data = k_cache_71_cast_fp16, input = k_cache_11)[name = string("coreml_update_state_157_write_state")]; tensor coreml_update_state_157 = read_state(input = k_cache_11)[name = string("coreml_update_state_157")]; tensor read_state_23 = read_state(input = v_cache_11)[name = string("read_state_23")]; tensor v_cache_69_cast_fp16 = mul(x = read_state_23, y = var_264_cast_fp16)[name = string("v_cache_69_cast_fp16")]; write_state(data = v_cache_69_cast_fp16, input = v_cache_11)[name = string("coreml_update_state_158_write_state")]; tensor coreml_update_state_158 = read_state(input = v_cache_11)[name = string("coreml_update_state_158")]; tensor v_23_cast_fp16 = transpose(perm = v_23_perm_0, x = var_2227_cast_fp16)[name = string("transpose_65")]; tensor var_2305_cast_fp16 = mul(x = v_23_cast_fp16, y = onehot_cast_fp16)[name = string("op_2305_cast_fp16")]; tensor v_cache_71_cast_fp16 = add(x = coreml_update_state_158, y = var_2305_cast_fp16)[name = string("v_cache_71_cast_fp16")]; write_state(data = v_cache_71_cast_fp16, input = v_cache_11)[name = string("coreml_update_state_159_write_state")]; tensor coreml_update_state_159 = read_state(input = v_cache_11)[name = string("coreml_update_state_159")]; tensor var_2307_axes_0 = const()[name = string("op_2307_axes_0"), val = tensor([2])]; tensor var_2307_cast_fp16 = expand_dims(axes = var_2307_axes_0, x = coreml_update_state_157)[name = string("op_2307_cast_fp16")]; tensor k_exp_45_reps_0 = const()[name = string("k_exp_45_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_45_cast_fp16 = tile(reps = k_exp_45_reps_0, x = var_2307_cast_fp16)[name = string("k_exp_45_cast_fp16")]; tensor var_2310 = const()[name = string("op_2310"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_47_cast_fp16 = reshape(shape = var_2310, x = k_exp_45_cast_fp16)[name = string("k_exp_47_cast_fp16")]; tensor var_2312_axes_0 = const()[name = string("op_2312_axes_0"), val = tensor([2])]; tensor var_2312_cast_fp16 = expand_dims(axes = var_2312_axes_0, x = coreml_update_state_159)[name = string("op_2312_cast_fp16")]; tensor v_exp_45_reps_0 = const()[name = string("v_exp_45_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_45_cast_fp16 = tile(reps = v_exp_45_reps_0, x = var_2312_cast_fp16)[name = string("v_exp_45_cast_fp16")]; tensor var_2315 = const()[name = string("op_2315"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_47_cast_fp16 = reshape(shape = var_2315, x = v_exp_45_cast_fp16)[name = string("v_exp_47_cast_fp16")]; bool var_2318_transpose_x_1 = const()[name = string("op_2318_transpose_x_1"), val = bool(false)]; bool var_2318_transpose_y_1 = const()[name = string("op_2318_transpose_y_1"), val = bool(true)]; tensor var_2318_cast_fp16 = matmul(transpose_x = var_2318_transpose_x_1, transpose_y = var_2318_transpose_y_1, x = q_23_cast_fp16, y = k_exp_47_cast_fp16)[name = string("op_2318_cast_fp16")]; fp16 var_2319_to_fp16 = const()[name = string("op_2319_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_45_cast_fp16 = mul(x = var_2318_cast_fp16, y = var_2319_to_fp16)[name = string("attn_45_cast_fp16")]; tensor input_111_cast_fp16 = add(x = attn_45_cast_fp16, y = attention_mask_to_fp16)[name = string("input_111_cast_fp16")]; tensor attn_47_cast_fp16 = softmax(axis = var_2184, x = input_111_cast_fp16)[name = string("attn_47_cast_fp16")]; bool out_23_transpose_x_0 = const()[name = string("out_23_transpose_x_0"), val = bool(false)]; bool out_23_transpose_y_0 = const()[name = string("out_23_transpose_y_0"), val = bool(false)]; tensor out_23_cast_fp16 = matmul(transpose_x = out_23_transpose_x_0, transpose_y = out_23_transpose_y_0, x = attn_47_cast_fp16, y = v_exp_47_cast_fp16)[name = string("out_23_cast_fp16")]; tensor var_2324_perm_0 = const()[name = string("op_2324_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2325 = const()[name = string("op_2325"), val = tensor([1, 1, -1])]; tensor var_2324_cast_fp16 = transpose(perm = var_2324_perm_0, x = out_23_cast_fp16)[name = string("transpose_64")]; tensor input_113_cast_fp16 = reshape(shape = var_2325, x = var_2324_cast_fp16)[name = string("input_113_cast_fp16")]; tensor layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179426752))))[name = string("layers_11_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_285_cast_fp16, y = linear_80_cast_fp16)[name = string("x_305_cast_fp16")]; fp16 var_2183_promoted_3_to_fp16 = const()[name = string("op_2183_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2332_cast_fp16 = pow(x = x_305_cast_fp16, y = var_2183_promoted_3_to_fp16)[name = string("op_2332_cast_fp16")]; tensor var_2334_axes_0 = const()[name = string("op_2334_axes_0"), val = tensor([-1])]; bool var_2334_keep_dims_0 = const()[name = string("op_2334_keep_dims_0"), val = bool(true)]; tensor var_2334_cast_fp16 = reduce_mean(axes = var_2334_axes_0, keep_dims = var_2334_keep_dims_0, x = var_2332_cast_fp16)[name = string("op_2334_cast_fp16")]; fp16 var_2335_to_fp16 = const()[name = string("op_2335_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2336_cast_fp16 = add(x = var_2334_cast_fp16, y = var_2335_to_fp16)[name = string("op_2336_cast_fp16")]; fp32 norm_95_epsilon_0 = const()[name = string("norm_95_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_95_cast_fp16 = rsqrt(epsilon = norm_95_epsilon_0, x = var_2336_cast_fp16)[name = string("norm_95_cast_fp16")]; tensor var_2338_cast_fp16 = mul(x = x_305_cast_fp16, y = norm_95_cast_fp16)[name = string("op_2338_cast_fp16")]; tensor layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427328)))]; tensor var_2339_cast_fp16 = mul(x = var_2338_cast_fp16, y = layers_11_post_attention_layernorm_weight_to_fp16)[name = string("op_2339_cast_fp16")]; tensor layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179429440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575232))))[name = string("layers_11_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = var_2339_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2349_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2349_cast_fp16")]; tensor layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185721600))))[name = string("layers_11_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_up_proj_weight_to_fp16_palettized, x = var_2339_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_119_cast_fp16 = mul(x = var_2349_cast_fp16, y = linear_82_cast_fp16)[name = string("input_119_cast_fp16")]; tensor layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185722176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188867968))))[name = string("layers_11_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor x_311_cast_fp16 = add(x = x_305_cast_fp16, y = linear_83_cast_fp16)[name = string("x_311_cast_fp16")]; int32 var_2369 = const()[name = string("op_2369"), val = int32(-1)]; fp16 var_2368_promoted_to_fp16 = const()[name = string("op_2368_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2378_cast_fp16 = pow(x = x_311_cast_fp16, y = var_2368_promoted_to_fp16)[name = string("op_2378_cast_fp16")]; tensor var_2380_axes_0 = const()[name = string("op_2380_axes_0"), val = tensor([-1])]; bool var_2380_keep_dims_0 = const()[name = string("op_2380_keep_dims_0"), val = bool(true)]; tensor var_2380_cast_fp16 = reduce_mean(axes = var_2380_axes_0, keep_dims = var_2380_keep_dims_0, x = var_2378_cast_fp16)[name = string("op_2380_cast_fp16")]; fp16 var_2381_to_fp16 = const()[name = string("op_2381_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2382_cast_fp16 = add(x = var_2380_cast_fp16, y = var_2381_to_fp16)[name = string("op_2382_cast_fp16")]; fp32 norm_97_epsilon_0 = const()[name = string("norm_97_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_97_cast_fp16 = rsqrt(epsilon = norm_97_epsilon_0, x = var_2382_cast_fp16)[name = string("norm_97_cast_fp16")]; tensor var_2384_cast_fp16 = mul(x = x_311_cast_fp16, y = norm_97_cast_fp16)[name = string("op_2384_cast_fp16")]; tensor layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188868544)))]; tensor var_2385_cast_fp16 = mul(x = var_2384_cast_fp16, y = layers_12_input_layernorm_weight_to_fp16)[name = string("op_2385_cast_fp16")]; tensor layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188870656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190967872))))[name = string("layers_12_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = var_2385_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor var_2401 = const()[name = string("op_2401"), val = tensor([1, 1, 16, 128])]; tensor var_2402_cast_fp16 = reshape(shape = var_2401, x = linear_84_cast_fp16)[name = string("op_2402_cast_fp16")]; tensor x_317_perm_0 = const()[name = string("x_317_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190968448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017088))))[name = string("layers_12_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = var_2385_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor var_2406 = const()[name = string("op_2406"), val = tensor([1, 1, 8, 128])]; tensor var_2407_cast_fp16 = reshape(shape = var_2406, x = linear_85_cast_fp16)[name = string("op_2407_cast_fp16")]; tensor x_321_perm_0 = const()[name = string("x_321_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066304))))[name = string("layers_12_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = var_2385_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor var_2411 = const()[name = string("op_2411"), val = tensor([1, 1, 8, 128])]; tensor var_2412_cast_fp16 = reshape(shape = var_2411, x = linear_86_cast_fp16)[name = string("op_2412_cast_fp16")]; tensor v_25_perm_0 = const()[name = string("v_25_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_2368_promoted_1_to_fp16 = const()[name = string("op_2368_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_317_cast_fp16 = transpose(perm = x_317_perm_0, x = var_2402_cast_fp16)[name = string("transpose_63")]; tensor var_2416_cast_fp16 = pow(x = x_317_cast_fp16, y = var_2368_promoted_1_to_fp16)[name = string("op_2416_cast_fp16")]; tensor var_2418_axes_0 = const()[name = string("op_2418_axes_0"), val = tensor([-1])]; bool var_2418_keep_dims_0 = const()[name = string("op_2418_keep_dims_0"), val = bool(true)]; tensor var_2418_cast_fp16 = reduce_mean(axes = var_2418_axes_0, keep_dims = var_2418_keep_dims_0, x = var_2416_cast_fp16)[name = string("op_2418_cast_fp16")]; fp16 var_2419_to_fp16 = const()[name = string("op_2419_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2420_cast_fp16 = add(x = var_2418_cast_fp16, y = var_2419_to_fp16)[name = string("op_2420_cast_fp16")]; fp32 norm_99_epsilon_0 = const()[name = string("norm_99_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_99_cast_fp16 = rsqrt(epsilon = norm_99_epsilon_0, x = var_2420_cast_fp16)[name = string("norm_99_cast_fp16")]; tensor var_2422_cast_fp16 = mul(x = x_317_cast_fp16, y = norm_99_cast_fp16)[name = string("op_2422_cast_fp16")]; tensor layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066880)))]; tensor var_2423_cast_fp16 = mul(x = var_2422_cast_fp16, y = layers_12_self_attn_q_norm_weight_to_fp16)[name = string("op_2423_cast_fp16")]; fp16 var_2368_promoted_2_to_fp16 = const()[name = string("op_2368_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_321_cast_fp16 = transpose(perm = x_321_perm_0, x = var_2407_cast_fp16)[name = string("transpose_62")]; tensor var_2427_cast_fp16 = pow(x = x_321_cast_fp16, y = var_2368_promoted_2_to_fp16)[name = string("op_2427_cast_fp16")]; tensor var_2429_axes_0 = const()[name = string("op_2429_axes_0"), val = tensor([-1])]; bool var_2429_keep_dims_0 = const()[name = string("op_2429_keep_dims_0"), val = bool(true)]; tensor var_2429_cast_fp16 = reduce_mean(axes = var_2429_axes_0, keep_dims = var_2429_keep_dims_0, x = var_2427_cast_fp16)[name = string("op_2429_cast_fp16")]; fp16 var_2430_to_fp16 = const()[name = string("op_2430_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2431_cast_fp16 = add(x = var_2429_cast_fp16, y = var_2430_to_fp16)[name = string("op_2431_cast_fp16")]; fp32 norm_101_epsilon_0 = const()[name = string("norm_101_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_101_cast_fp16 = rsqrt(epsilon = norm_101_epsilon_0, x = var_2431_cast_fp16)[name = string("norm_101_cast_fp16")]; tensor var_2433_cast_fp16 = mul(x = x_321_cast_fp16, y = norm_101_cast_fp16)[name = string("op_2433_cast_fp16")]; tensor layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067200)))]; tensor var_2434_cast_fp16 = mul(x = var_2433_cast_fp16, y = layers_12_self_attn_k_norm_weight_to_fp16)[name = string("op_2434_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_2423_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_2423_cast_fp16)[name = string("x2_49_cast_fp16")]; tensor var_2452_cast_fp16 = mul(x = x1_49_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2452_cast_fp16")]; tensor var_2453_cast_fp16 = mul(x = x2_49_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2453_cast_fp16")]; tensor var_2454_cast_fp16 = sub(x = var_2452_cast_fp16, y = var_2453_cast_fp16)[name = string("op_2454_cast_fp16")]; tensor var_2455_cast_fp16 = mul(x = x2_49_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2455_cast_fp16")]; tensor var_2456_cast_fp16 = mul(x = x1_49_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2456_cast_fp16")]; tensor var_2457_cast_fp16 = add(x = var_2455_cast_fp16, y = var_2456_cast_fp16)[name = string("op_2457_cast_fp16")]; bool q_25_interleave_0 = const()[name = string("q_25_interleave_0"), val = bool(false)]; tensor q_25_cast_fp16 = concat(axis = var_2369, interleave = q_25_interleave_0, values = (var_2454_cast_fp16, var_2457_cast_fp16))[name = string("q_25_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_2434_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_2434_cast_fp16)[name = string("x2_51_cast_fp16")]; tensor var_2476_cast_fp16 = mul(x = x1_51_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2476_cast_fp16")]; tensor var_2477_cast_fp16 = mul(x = x2_51_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2477_cast_fp16")]; tensor var_2478_cast_fp16 = sub(x = var_2476_cast_fp16, y = var_2477_cast_fp16)[name = string("op_2478_cast_fp16")]; tensor var_2479_cast_fp16 = mul(x = x2_51_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2479_cast_fp16")]; tensor var_2480_cast_fp16 = mul(x = x1_51_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2480_cast_fp16")]; tensor var_2481_cast_fp16 = add(x = var_2479_cast_fp16, y = var_2480_cast_fp16)[name = string("op_2481_cast_fp16")]; bool k_25_interleave_0 = const()[name = string("k_25_interleave_0"), val = bool(false)]; tensor k_25_cast_fp16 = concat(axis = var_2369, interleave = k_25_interleave_0, values = (var_2478_cast_fp16, var_2481_cast_fp16))[name = string("k_25_cast_fp16")]; tensor read_state_24 = read_state(input = k_cache_12)[name = string("read_state_24")]; tensor k_cache_75_cast_fp16 = mul(x = read_state_24, y = var_264_cast_fp16)[name = string("k_cache_75_cast_fp16")]; write_state(data = k_cache_75_cast_fp16, input = k_cache_12)[name = string("coreml_update_state_160_write_state")]; tensor coreml_update_state_160 = read_state(input = k_cache_12)[name = string("coreml_update_state_160")]; tensor var_2486_cast_fp16 = mul(x = k_25_cast_fp16, y = onehot_cast_fp16)[name = string("op_2486_cast_fp16")]; tensor k_cache_77_cast_fp16 = add(x = coreml_update_state_160, y = var_2486_cast_fp16)[name = string("k_cache_77_cast_fp16")]; write_state(data = k_cache_77_cast_fp16, input = k_cache_12)[name = string("coreml_update_state_161_write_state")]; tensor coreml_update_state_161 = read_state(input = k_cache_12)[name = string("coreml_update_state_161")]; tensor read_state_25 = read_state(input = v_cache_12)[name = string("read_state_25")]; tensor v_cache_75_cast_fp16 = mul(x = read_state_25, y = var_264_cast_fp16)[name = string("v_cache_75_cast_fp16")]; write_state(data = v_cache_75_cast_fp16, input = v_cache_12)[name = string("coreml_update_state_162_write_state")]; tensor coreml_update_state_162 = read_state(input = v_cache_12)[name = string("coreml_update_state_162")]; tensor v_25_cast_fp16 = transpose(perm = v_25_perm_0, x = var_2412_cast_fp16)[name = string("transpose_61")]; tensor var_2490_cast_fp16 = mul(x = v_25_cast_fp16, y = onehot_cast_fp16)[name = string("op_2490_cast_fp16")]; tensor v_cache_77_cast_fp16 = add(x = coreml_update_state_162, y = var_2490_cast_fp16)[name = string("v_cache_77_cast_fp16")]; write_state(data = v_cache_77_cast_fp16, input = v_cache_12)[name = string("coreml_update_state_163_write_state")]; tensor coreml_update_state_163 = read_state(input = v_cache_12)[name = string("coreml_update_state_163")]; tensor var_2492_axes_0 = const()[name = string("op_2492_axes_0"), val = tensor([2])]; tensor var_2492_cast_fp16 = expand_dims(axes = var_2492_axes_0, x = coreml_update_state_161)[name = string("op_2492_cast_fp16")]; tensor k_exp_49_reps_0 = const()[name = string("k_exp_49_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_49_cast_fp16 = tile(reps = k_exp_49_reps_0, x = var_2492_cast_fp16)[name = string("k_exp_49_cast_fp16")]; tensor var_2495 = const()[name = string("op_2495"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_51_cast_fp16 = reshape(shape = var_2495, x = k_exp_49_cast_fp16)[name = string("k_exp_51_cast_fp16")]; tensor var_2497_axes_0 = const()[name = string("op_2497_axes_0"), val = tensor([2])]; tensor var_2497_cast_fp16 = expand_dims(axes = var_2497_axes_0, x = coreml_update_state_163)[name = string("op_2497_cast_fp16")]; tensor v_exp_49_reps_0 = const()[name = string("v_exp_49_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_49_cast_fp16 = tile(reps = v_exp_49_reps_0, x = var_2497_cast_fp16)[name = string("v_exp_49_cast_fp16")]; tensor var_2500 = const()[name = string("op_2500"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_51_cast_fp16 = reshape(shape = var_2500, x = v_exp_49_cast_fp16)[name = string("v_exp_51_cast_fp16")]; bool var_2503_transpose_x_1 = const()[name = string("op_2503_transpose_x_1"), val = bool(false)]; bool var_2503_transpose_y_1 = const()[name = string("op_2503_transpose_y_1"), val = bool(true)]; tensor var_2503_cast_fp16 = matmul(transpose_x = var_2503_transpose_x_1, transpose_y = var_2503_transpose_y_1, x = q_25_cast_fp16, y = k_exp_51_cast_fp16)[name = string("op_2503_cast_fp16")]; fp16 var_2504_to_fp16 = const()[name = string("op_2504_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_49_cast_fp16 = mul(x = var_2503_cast_fp16, y = var_2504_to_fp16)[name = string("attn_49_cast_fp16")]; tensor input_121_cast_fp16 = add(x = attn_49_cast_fp16, y = attention_mask_to_fp16)[name = string("input_121_cast_fp16")]; tensor attn_51_cast_fp16 = softmax(axis = var_2369, x = input_121_cast_fp16)[name = string("attn_51_cast_fp16")]; bool out_25_transpose_x_0 = const()[name = string("out_25_transpose_x_0"), val = bool(false)]; bool out_25_transpose_y_0 = const()[name = string("out_25_transpose_y_0"), val = bool(false)]; tensor out_25_cast_fp16 = matmul(transpose_x = out_25_transpose_x_0, transpose_y = out_25_transpose_y_0, x = attn_51_cast_fp16, y = v_exp_51_cast_fp16)[name = string("out_25_cast_fp16")]; tensor var_2509_perm_0 = const()[name = string("op_2509_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2510 = const()[name = string("op_2510"), val = tensor([1, 1, -1])]; tensor var_2509_cast_fp16 = transpose(perm = var_2509_perm_0, x = out_25_cast_fp16)[name = string("transpose_60")]; tensor input_123_cast_fp16 = reshape(shape = var_2510, x = var_2509_cast_fp16)[name = string("input_123_cast_fp16")]; tensor layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195164736))))[name = string("layers_12_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_311_cast_fp16, y = linear_87_cast_fp16)[name = string("x_331_cast_fp16")]; fp16 var_2368_promoted_3_to_fp16 = const()[name = string("op_2368_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2517_cast_fp16 = pow(x = x_331_cast_fp16, y = var_2368_promoted_3_to_fp16)[name = string("op_2517_cast_fp16")]; tensor var_2519_axes_0 = const()[name = string("op_2519_axes_0"), val = tensor([-1])]; bool var_2519_keep_dims_0 = const()[name = string("op_2519_keep_dims_0"), val = bool(true)]; tensor var_2519_cast_fp16 = reduce_mean(axes = var_2519_axes_0, keep_dims = var_2519_keep_dims_0, x = var_2517_cast_fp16)[name = string("op_2519_cast_fp16")]; fp16 var_2520_to_fp16 = const()[name = string("op_2520_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2521_cast_fp16 = add(x = var_2519_cast_fp16, y = var_2520_to_fp16)[name = string("op_2521_cast_fp16")]; fp32 norm_103_epsilon_0 = const()[name = string("norm_103_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_103_cast_fp16 = rsqrt(epsilon = norm_103_epsilon_0, x = var_2521_cast_fp16)[name = string("norm_103_cast_fp16")]; tensor var_2523_cast_fp16 = mul(x = x_331_cast_fp16, y = norm_103_cast_fp16)[name = string("op_2523_cast_fp16")]; tensor layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195165312)))]; tensor var_2524_cast_fp16 = mul(x = var_2523_cast_fp16, y = layers_12_post_attention_layernorm_weight_to_fp16)[name = string("op_2524_cast_fp16")]; tensor layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195167424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313216))))[name = string("layers_12_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = var_2524_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2534_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2534_cast_fp16")]; tensor layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201459584))))[name = string("layers_12_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_up_proj_weight_to_fp16_palettized, x = var_2524_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_129_cast_fp16 = mul(x = var_2534_cast_fp16, y = linear_89_cast_fp16)[name = string("input_129_cast_fp16")]; tensor layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201460160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204605952))))[name = string("layers_12_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_90_cast_fp16)[name = string("x_337_cast_fp16")]; int32 var_2554 = const()[name = string("op_2554"), val = int32(-1)]; fp16 var_2553_promoted_to_fp16 = const()[name = string("op_2553_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2563_cast_fp16 = pow(x = x_337_cast_fp16, y = var_2553_promoted_to_fp16)[name = string("op_2563_cast_fp16")]; tensor var_2565_axes_0 = const()[name = string("op_2565_axes_0"), val = tensor([-1])]; bool var_2565_keep_dims_0 = const()[name = string("op_2565_keep_dims_0"), val = bool(true)]; tensor var_2565_cast_fp16 = reduce_mean(axes = var_2565_axes_0, keep_dims = var_2565_keep_dims_0, x = var_2563_cast_fp16)[name = string("op_2565_cast_fp16")]; fp16 var_2566_to_fp16 = const()[name = string("op_2566_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2567_cast_fp16 = add(x = var_2565_cast_fp16, y = var_2566_to_fp16)[name = string("op_2567_cast_fp16")]; fp32 norm_105_epsilon_0 = const()[name = string("norm_105_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_105_cast_fp16 = rsqrt(epsilon = norm_105_epsilon_0, x = var_2567_cast_fp16)[name = string("norm_105_cast_fp16")]; tensor var_2569_cast_fp16 = mul(x = x_337_cast_fp16, y = norm_105_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204606528)))]; tensor var_2570_cast_fp16 = mul(x = var_2569_cast_fp16, y = layers_13_input_layernorm_weight_to_fp16)[name = string("op_2570_cast_fp16")]; tensor layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204608640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206705856))))[name = string("layers_13_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = var_2570_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor var_2586 = const()[name = string("op_2586"), val = tensor([1, 1, 16, 128])]; tensor var_2587_cast_fp16 = reshape(shape = var_2586, x = linear_91_cast_fp16)[name = string("op_2587_cast_fp16")]; tensor x_343_perm_0 = const()[name = string("x_343_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206706432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755072))))[name = string("layers_13_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = var_2570_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor var_2591 = const()[name = string("op_2591"), val = tensor([1, 1, 8, 128])]; tensor var_2592_cast_fp16 = reshape(shape = var_2591, x = linear_92_cast_fp16)[name = string("op_2592_cast_fp16")]; tensor x_347_perm_0 = const()[name = string("x_347_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804288))))[name = string("layers_13_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = var_2570_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor var_2596 = const()[name = string("op_2596"), val = tensor([1, 1, 8, 128])]; tensor var_2597_cast_fp16 = reshape(shape = var_2596, x = linear_93_cast_fp16)[name = string("op_2597_cast_fp16")]; tensor v_27_perm_0 = const()[name = string("v_27_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_2553_promoted_1_to_fp16 = const()[name = string("op_2553_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_343_cast_fp16 = transpose(perm = x_343_perm_0, x = var_2587_cast_fp16)[name = string("transpose_59")]; tensor var_2601_cast_fp16 = pow(x = x_343_cast_fp16, y = var_2553_promoted_1_to_fp16)[name = string("op_2601_cast_fp16")]; tensor var_2603_axes_0 = const()[name = string("op_2603_axes_0"), val = tensor([-1])]; bool var_2603_keep_dims_0 = const()[name = string("op_2603_keep_dims_0"), val = bool(true)]; tensor var_2603_cast_fp16 = reduce_mean(axes = var_2603_axes_0, keep_dims = var_2603_keep_dims_0, x = var_2601_cast_fp16)[name = string("op_2603_cast_fp16")]; fp16 var_2604_to_fp16 = const()[name = string("op_2604_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2605_cast_fp16 = add(x = var_2603_cast_fp16, y = var_2604_to_fp16)[name = string("op_2605_cast_fp16")]; fp32 norm_107_epsilon_0 = const()[name = string("norm_107_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_107_cast_fp16 = rsqrt(epsilon = norm_107_epsilon_0, x = var_2605_cast_fp16)[name = string("norm_107_cast_fp16")]; tensor var_2607_cast_fp16 = mul(x = x_343_cast_fp16, y = norm_107_cast_fp16)[name = string("op_2607_cast_fp16")]; tensor layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804864)))]; tensor var_2608_cast_fp16 = mul(x = var_2607_cast_fp16, y = layers_13_self_attn_q_norm_weight_to_fp16)[name = string("op_2608_cast_fp16")]; fp16 var_2553_promoted_2_to_fp16 = const()[name = string("op_2553_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_347_cast_fp16 = transpose(perm = x_347_perm_0, x = var_2592_cast_fp16)[name = string("transpose_58")]; tensor var_2612_cast_fp16 = pow(x = x_347_cast_fp16, y = var_2553_promoted_2_to_fp16)[name = string("op_2612_cast_fp16")]; tensor var_2614_axes_0 = const()[name = string("op_2614_axes_0"), val = tensor([-1])]; bool var_2614_keep_dims_0 = const()[name = string("op_2614_keep_dims_0"), val = bool(true)]; tensor var_2614_cast_fp16 = reduce_mean(axes = var_2614_axes_0, keep_dims = var_2614_keep_dims_0, x = var_2612_cast_fp16)[name = string("op_2614_cast_fp16")]; fp16 var_2615_to_fp16 = const()[name = string("op_2615_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2616_cast_fp16 = add(x = var_2614_cast_fp16, y = var_2615_to_fp16)[name = string("op_2616_cast_fp16")]; fp32 norm_109_epsilon_0 = const()[name = string("norm_109_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_109_cast_fp16 = rsqrt(epsilon = norm_109_epsilon_0, x = var_2616_cast_fp16)[name = string("norm_109_cast_fp16")]; tensor var_2618_cast_fp16 = mul(x = x_347_cast_fp16, y = norm_109_cast_fp16)[name = string("op_2618_cast_fp16")]; tensor layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805184)))]; tensor var_2619_cast_fp16 = mul(x = var_2618_cast_fp16, y = layers_13_self_attn_k_norm_weight_to_fp16)[name = string("op_2619_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_2608_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_2608_cast_fp16)[name = string("x2_53_cast_fp16")]; tensor var_2637_cast_fp16 = mul(x = x1_53_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2637_cast_fp16")]; tensor var_2638_cast_fp16 = mul(x = x2_53_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2638_cast_fp16")]; tensor var_2639_cast_fp16 = sub(x = var_2637_cast_fp16, y = var_2638_cast_fp16)[name = string("op_2639_cast_fp16")]; tensor var_2640_cast_fp16 = mul(x = x2_53_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2640_cast_fp16")]; tensor var_2641_cast_fp16 = mul(x = x1_53_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2641_cast_fp16")]; tensor var_2642_cast_fp16 = add(x = var_2640_cast_fp16, y = var_2641_cast_fp16)[name = string("op_2642_cast_fp16")]; bool q_27_interleave_0 = const()[name = string("q_27_interleave_0"), val = bool(false)]; tensor q_27_cast_fp16 = concat(axis = var_2554, interleave = q_27_interleave_0, values = (var_2639_cast_fp16, var_2642_cast_fp16))[name = string("q_27_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = var_2619_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = var_2619_cast_fp16)[name = string("x2_55_cast_fp16")]; tensor var_2661_cast_fp16 = mul(x = x1_55_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2661_cast_fp16")]; tensor var_2662_cast_fp16 = mul(x = x2_55_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2662_cast_fp16")]; tensor var_2663_cast_fp16 = sub(x = var_2661_cast_fp16, y = var_2662_cast_fp16)[name = string("op_2663_cast_fp16")]; tensor var_2664_cast_fp16 = mul(x = x2_55_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2664_cast_fp16")]; tensor var_2665_cast_fp16 = mul(x = x1_55_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2665_cast_fp16")]; tensor var_2666_cast_fp16 = add(x = var_2664_cast_fp16, y = var_2665_cast_fp16)[name = string("op_2666_cast_fp16")]; bool k_27_interleave_0 = const()[name = string("k_27_interleave_0"), val = bool(false)]; tensor k_27_cast_fp16 = concat(axis = var_2554, interleave = k_27_interleave_0, values = (var_2663_cast_fp16, var_2666_cast_fp16))[name = string("k_27_cast_fp16")]; tensor read_state_26 = read_state(input = k_cache_13)[name = string("read_state_26")]; tensor k_cache_81_cast_fp16 = mul(x = read_state_26, y = var_264_cast_fp16)[name = string("k_cache_81_cast_fp16")]; write_state(data = k_cache_81_cast_fp16, input = k_cache_13)[name = string("coreml_update_state_164_write_state")]; tensor coreml_update_state_164 = read_state(input = k_cache_13)[name = string("coreml_update_state_164")]; tensor var_2671_cast_fp16 = mul(x = k_27_cast_fp16, y = onehot_cast_fp16)[name = string("op_2671_cast_fp16")]; tensor k_cache_83_cast_fp16 = add(x = coreml_update_state_164, y = var_2671_cast_fp16)[name = string("k_cache_83_cast_fp16")]; write_state(data = k_cache_83_cast_fp16, input = k_cache_13)[name = string("coreml_update_state_165_write_state")]; tensor coreml_update_state_165 = read_state(input = k_cache_13)[name = string("coreml_update_state_165")]; tensor read_state_27 = read_state(input = v_cache_13)[name = string("read_state_27")]; tensor v_cache_81_cast_fp16 = mul(x = read_state_27, y = var_264_cast_fp16)[name = string("v_cache_81_cast_fp16")]; write_state(data = v_cache_81_cast_fp16, input = v_cache_13)[name = string("coreml_update_state_166_write_state")]; tensor coreml_update_state_166 = read_state(input = v_cache_13)[name = string("coreml_update_state_166")]; tensor v_27_cast_fp16 = transpose(perm = v_27_perm_0, x = var_2597_cast_fp16)[name = string("transpose_57")]; tensor var_2675_cast_fp16 = mul(x = v_27_cast_fp16, y = onehot_cast_fp16)[name = string("op_2675_cast_fp16")]; tensor v_cache_83_cast_fp16 = add(x = coreml_update_state_166, y = var_2675_cast_fp16)[name = string("v_cache_83_cast_fp16")]; write_state(data = v_cache_83_cast_fp16, input = v_cache_13)[name = string("coreml_update_state_167_write_state")]; tensor coreml_update_state_167 = read_state(input = v_cache_13)[name = string("coreml_update_state_167")]; tensor var_2677_axes_0 = const()[name = string("op_2677_axes_0"), val = tensor([2])]; tensor var_2677_cast_fp16 = expand_dims(axes = var_2677_axes_0, x = coreml_update_state_165)[name = string("op_2677_cast_fp16")]; tensor k_exp_53_reps_0 = const()[name = string("k_exp_53_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_53_cast_fp16 = tile(reps = k_exp_53_reps_0, x = var_2677_cast_fp16)[name = string("k_exp_53_cast_fp16")]; tensor var_2680 = const()[name = string("op_2680"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_55_cast_fp16 = reshape(shape = var_2680, x = k_exp_53_cast_fp16)[name = string("k_exp_55_cast_fp16")]; tensor var_2682_axes_0 = const()[name = string("op_2682_axes_0"), val = tensor([2])]; tensor var_2682_cast_fp16 = expand_dims(axes = var_2682_axes_0, x = coreml_update_state_167)[name = string("op_2682_cast_fp16")]; tensor v_exp_53_reps_0 = const()[name = string("v_exp_53_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_53_cast_fp16 = tile(reps = v_exp_53_reps_0, x = var_2682_cast_fp16)[name = string("v_exp_53_cast_fp16")]; tensor var_2685 = const()[name = string("op_2685"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_55_cast_fp16 = reshape(shape = var_2685, x = v_exp_53_cast_fp16)[name = string("v_exp_55_cast_fp16")]; bool var_2688_transpose_x_1 = const()[name = string("op_2688_transpose_x_1"), val = bool(false)]; bool var_2688_transpose_y_1 = const()[name = string("op_2688_transpose_y_1"), val = bool(true)]; tensor var_2688_cast_fp16 = matmul(transpose_x = var_2688_transpose_x_1, transpose_y = var_2688_transpose_y_1, x = q_27_cast_fp16, y = k_exp_55_cast_fp16)[name = string("op_2688_cast_fp16")]; fp16 var_2689_to_fp16 = const()[name = string("op_2689_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_53_cast_fp16 = mul(x = var_2688_cast_fp16, y = var_2689_to_fp16)[name = string("attn_53_cast_fp16")]; tensor input_131_cast_fp16 = add(x = attn_53_cast_fp16, y = attention_mask_to_fp16)[name = string("input_131_cast_fp16")]; tensor attn_55_cast_fp16 = softmax(axis = var_2554, x = input_131_cast_fp16)[name = string("attn_55_cast_fp16")]; bool out_27_transpose_x_0 = const()[name = string("out_27_transpose_x_0"), val = bool(false)]; bool out_27_transpose_y_0 = const()[name = string("out_27_transpose_y_0"), val = bool(false)]; tensor out_27_cast_fp16 = matmul(transpose_x = out_27_transpose_x_0, transpose_y = out_27_transpose_y_0, x = attn_55_cast_fp16, y = v_exp_55_cast_fp16)[name = string("out_27_cast_fp16")]; tensor var_2694_perm_0 = const()[name = string("op_2694_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2695 = const()[name = string("op_2695"), val = tensor([1, 1, -1])]; tensor var_2694_cast_fp16 = transpose(perm = var_2694_perm_0, x = out_27_cast_fp16)[name = string("transpose_56")]; tensor input_133_cast_fp16 = reshape(shape = var_2695, x = var_2694_cast_fp16)[name = string("input_133_cast_fp16")]; tensor layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210902720))))[name = string("layers_13_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor x_357_cast_fp16 = add(x = x_337_cast_fp16, y = linear_94_cast_fp16)[name = string("x_357_cast_fp16")]; fp16 var_2553_promoted_3_to_fp16 = const()[name = string("op_2553_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2702_cast_fp16 = pow(x = x_357_cast_fp16, y = var_2553_promoted_3_to_fp16)[name = string("op_2702_cast_fp16")]; tensor var_2704_axes_0 = const()[name = string("op_2704_axes_0"), val = tensor([-1])]; bool var_2704_keep_dims_0 = const()[name = string("op_2704_keep_dims_0"), val = bool(true)]; tensor var_2704_cast_fp16 = reduce_mean(axes = var_2704_axes_0, keep_dims = var_2704_keep_dims_0, x = var_2702_cast_fp16)[name = string("op_2704_cast_fp16")]; fp16 var_2705_to_fp16 = const()[name = string("op_2705_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2706_cast_fp16 = add(x = var_2704_cast_fp16, y = var_2705_to_fp16)[name = string("op_2706_cast_fp16")]; fp32 norm_111_epsilon_0 = const()[name = string("norm_111_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_111_cast_fp16 = rsqrt(epsilon = norm_111_epsilon_0, x = var_2706_cast_fp16)[name = string("norm_111_cast_fp16")]; tensor var_2708_cast_fp16 = mul(x = x_357_cast_fp16, y = norm_111_cast_fp16)[name = string("op_2708_cast_fp16")]; tensor layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210903296)))]; tensor var_2709_cast_fp16 = mul(x = var_2708_cast_fp16, y = layers_13_post_attention_layernorm_weight_to_fp16)[name = string("op_2709_cast_fp16")]; tensor layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210905408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051200))))[name = string("layers_13_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = var_2709_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_2719_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2719_cast_fp16")]; tensor layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217197568))))[name = string("layers_13_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_up_proj_weight_to_fp16_palettized, x = var_2709_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_139_cast_fp16 = mul(x = var_2719_cast_fp16, y = linear_96_cast_fp16)[name = string("input_139_cast_fp16")]; tensor layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217198144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220343936))))[name = string("layers_13_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_97_cast_fp16)[name = string("x_363_cast_fp16")]; int32 var_2739 = const()[name = string("op_2739"), val = int32(-1)]; fp16 var_2738_promoted_to_fp16 = const()[name = string("op_2738_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2748_cast_fp16 = pow(x = x_363_cast_fp16, y = var_2738_promoted_to_fp16)[name = string("op_2748_cast_fp16")]; tensor var_2750_axes_0 = const()[name = string("op_2750_axes_0"), val = tensor([-1])]; bool var_2750_keep_dims_0 = const()[name = string("op_2750_keep_dims_0"), val = bool(true)]; tensor var_2750_cast_fp16 = reduce_mean(axes = var_2750_axes_0, keep_dims = var_2750_keep_dims_0, x = var_2748_cast_fp16)[name = string("op_2750_cast_fp16")]; fp16 var_2751_to_fp16 = const()[name = string("op_2751_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2752_cast_fp16 = add(x = var_2750_cast_fp16, y = var_2751_to_fp16)[name = string("op_2752_cast_fp16")]; fp32 norm_113_epsilon_0 = const()[name = string("norm_113_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_113_cast_fp16 = rsqrt(epsilon = norm_113_epsilon_0, x = var_2752_cast_fp16)[name = string("norm_113_cast_fp16")]; tensor var_2754_cast_fp16 = mul(x = x_363_cast_fp16, y = norm_113_cast_fp16)[name = string("op_2754_cast_fp16")]; tensor layers_14_input_layernorm_weight_to_fp16 = const()[name = string("layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220344512)))]; tensor var_2755_cast_fp16 = mul(x = var_2754_cast_fp16, y = layers_14_input_layernorm_weight_to_fp16)[name = string("op_2755_cast_fp16")]; tensor layers_14_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220346624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222443840))))[name = string("layers_14_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_14_self_attn_q_proj_weight_to_fp16_palettized, x = var_2755_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor var_2771 = const()[name = string("op_2771"), val = tensor([1, 1, 16, 128])]; tensor var_2772_cast_fp16 = reshape(shape = var_2771, x = linear_98_cast_fp16)[name = string("op_2772_cast_fp16")]; tensor x_369_perm_0 = const()[name = string("x_369_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_14_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222444416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223493056))))[name = string("layers_14_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_k_proj_weight_to_fp16_palettized, x = var_2755_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor var_2776 = const()[name = string("op_2776"), val = tensor([1, 1, 8, 128])]; tensor var_2777_cast_fp16 = reshape(shape = var_2776, x = linear_99_cast_fp16)[name = string("op_2777_cast_fp16")]; tensor x_373_perm_0 = const()[name = string("x_373_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_14_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223493632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224542272))))[name = string("layers_14_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_v_proj_weight_to_fp16_palettized, x = var_2755_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor var_2781 = const()[name = string("op_2781"), val = tensor([1, 1, 8, 128])]; tensor var_2782_cast_fp16 = reshape(shape = var_2781, x = linear_100_cast_fp16)[name = string("op_2782_cast_fp16")]; tensor v_29_perm_0 = const()[name = string("v_29_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_2738_promoted_1_to_fp16 = const()[name = string("op_2738_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_369_cast_fp16 = transpose(perm = x_369_perm_0, x = var_2772_cast_fp16)[name = string("transpose_55")]; tensor var_2786_cast_fp16 = pow(x = x_369_cast_fp16, y = var_2738_promoted_1_to_fp16)[name = string("op_2786_cast_fp16")]; tensor var_2788_axes_0 = const()[name = string("op_2788_axes_0"), val = tensor([-1])]; bool var_2788_keep_dims_0 = const()[name = string("op_2788_keep_dims_0"), val = bool(true)]; tensor var_2788_cast_fp16 = reduce_mean(axes = var_2788_axes_0, keep_dims = var_2788_keep_dims_0, x = var_2786_cast_fp16)[name = string("op_2788_cast_fp16")]; fp16 var_2789_to_fp16 = const()[name = string("op_2789_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2790_cast_fp16 = add(x = var_2788_cast_fp16, y = var_2789_to_fp16)[name = string("op_2790_cast_fp16")]; fp32 norm_115_epsilon_0 = const()[name = string("norm_115_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_115_cast_fp16 = rsqrt(epsilon = norm_115_epsilon_0, x = var_2790_cast_fp16)[name = string("norm_115_cast_fp16")]; tensor var_2792_cast_fp16 = mul(x = x_369_cast_fp16, y = norm_115_cast_fp16)[name = string("op_2792_cast_fp16")]; tensor layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224542848)))]; tensor var_2793_cast_fp16 = mul(x = var_2792_cast_fp16, y = layers_14_self_attn_q_norm_weight_to_fp16)[name = string("op_2793_cast_fp16")]; fp16 var_2738_promoted_2_to_fp16 = const()[name = string("op_2738_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_373_cast_fp16 = transpose(perm = x_373_perm_0, x = var_2777_cast_fp16)[name = string("transpose_54")]; tensor var_2797_cast_fp16 = pow(x = x_373_cast_fp16, y = var_2738_promoted_2_to_fp16)[name = string("op_2797_cast_fp16")]; tensor var_2799_axes_0 = const()[name = string("op_2799_axes_0"), val = tensor([-1])]; bool var_2799_keep_dims_0 = const()[name = string("op_2799_keep_dims_0"), val = bool(true)]; tensor var_2799_cast_fp16 = reduce_mean(axes = var_2799_axes_0, keep_dims = var_2799_keep_dims_0, x = var_2797_cast_fp16)[name = string("op_2799_cast_fp16")]; fp16 var_2800_to_fp16 = const()[name = string("op_2800_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2801_cast_fp16 = add(x = var_2799_cast_fp16, y = var_2800_to_fp16)[name = string("op_2801_cast_fp16")]; fp32 norm_117_epsilon_0 = const()[name = string("norm_117_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_117_cast_fp16 = rsqrt(epsilon = norm_117_epsilon_0, x = var_2801_cast_fp16)[name = string("norm_117_cast_fp16")]; tensor var_2803_cast_fp16 = mul(x = x_373_cast_fp16, y = norm_117_cast_fp16)[name = string("op_2803_cast_fp16")]; tensor layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224543168)))]; tensor var_2804_cast_fp16 = mul(x = var_2803_cast_fp16, y = layers_14_self_attn_k_norm_weight_to_fp16)[name = string("op_2804_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = var_2793_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = var_2793_cast_fp16)[name = string("x2_57_cast_fp16")]; tensor var_2822_cast_fp16 = mul(x = x1_57_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2822_cast_fp16")]; tensor var_2823_cast_fp16 = mul(x = x2_57_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2823_cast_fp16")]; tensor var_2824_cast_fp16 = sub(x = var_2822_cast_fp16, y = var_2823_cast_fp16)[name = string("op_2824_cast_fp16")]; tensor var_2825_cast_fp16 = mul(x = x2_57_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2825_cast_fp16")]; tensor var_2826_cast_fp16 = mul(x = x1_57_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2826_cast_fp16")]; tensor var_2827_cast_fp16 = add(x = var_2825_cast_fp16, y = var_2826_cast_fp16)[name = string("op_2827_cast_fp16")]; bool q_29_interleave_0 = const()[name = string("q_29_interleave_0"), val = bool(false)]; tensor q_29_cast_fp16 = concat(axis = var_2739, interleave = q_29_interleave_0, values = (var_2824_cast_fp16, var_2827_cast_fp16))[name = string("q_29_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = var_2804_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = var_2804_cast_fp16)[name = string("x2_59_cast_fp16")]; tensor var_2846_cast_fp16 = mul(x = x1_59_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2846_cast_fp16")]; tensor var_2847_cast_fp16 = mul(x = x2_59_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2847_cast_fp16")]; tensor var_2848_cast_fp16 = sub(x = var_2846_cast_fp16, y = var_2847_cast_fp16)[name = string("op_2848_cast_fp16")]; tensor var_2849_cast_fp16 = mul(x = x2_59_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2849_cast_fp16")]; tensor var_2850_cast_fp16 = mul(x = x1_59_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = add(x = var_2849_cast_fp16, y = var_2850_cast_fp16)[name = string("op_2851_cast_fp16")]; bool k_29_interleave_0 = const()[name = string("k_29_interleave_0"), val = bool(false)]; tensor k_29_cast_fp16 = concat(axis = var_2739, interleave = k_29_interleave_0, values = (var_2848_cast_fp16, var_2851_cast_fp16))[name = string("k_29_cast_fp16")]; tensor read_state_28 = read_state(input = k_cache_14)[name = string("read_state_28")]; tensor k_cache_87_cast_fp16 = mul(x = read_state_28, y = var_264_cast_fp16)[name = string("k_cache_87_cast_fp16")]; write_state(data = k_cache_87_cast_fp16, input = k_cache_14)[name = string("coreml_update_state_168_write_state")]; tensor coreml_update_state_168 = read_state(input = k_cache_14)[name = string("coreml_update_state_168")]; tensor var_2856_cast_fp16 = mul(x = k_29_cast_fp16, y = onehot_cast_fp16)[name = string("op_2856_cast_fp16")]; tensor k_cache_89_cast_fp16 = add(x = coreml_update_state_168, y = var_2856_cast_fp16)[name = string("k_cache_89_cast_fp16")]; write_state(data = k_cache_89_cast_fp16, input = k_cache_14)[name = string("coreml_update_state_169_write_state")]; tensor coreml_update_state_169 = read_state(input = k_cache_14)[name = string("coreml_update_state_169")]; tensor read_state_29 = read_state(input = v_cache_14)[name = string("read_state_29")]; tensor v_cache_87_cast_fp16 = mul(x = read_state_29, y = var_264_cast_fp16)[name = string("v_cache_87_cast_fp16")]; write_state(data = v_cache_87_cast_fp16, input = v_cache_14)[name = string("coreml_update_state_170_write_state")]; tensor coreml_update_state_170 = read_state(input = v_cache_14)[name = string("coreml_update_state_170")]; tensor v_29_cast_fp16 = transpose(perm = v_29_perm_0, x = var_2782_cast_fp16)[name = string("transpose_53")]; tensor var_2860_cast_fp16 = mul(x = v_29_cast_fp16, y = onehot_cast_fp16)[name = string("op_2860_cast_fp16")]; tensor v_cache_89_cast_fp16 = add(x = coreml_update_state_170, y = var_2860_cast_fp16)[name = string("v_cache_89_cast_fp16")]; write_state(data = v_cache_89_cast_fp16, input = v_cache_14)[name = string("coreml_update_state_171_write_state")]; tensor coreml_update_state_171 = read_state(input = v_cache_14)[name = string("coreml_update_state_171")]; tensor var_2862_axes_0 = const()[name = string("op_2862_axes_0"), val = tensor([2])]; tensor var_2862_cast_fp16 = expand_dims(axes = var_2862_axes_0, x = coreml_update_state_169)[name = string("op_2862_cast_fp16")]; tensor k_exp_57_reps_0 = const()[name = string("k_exp_57_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_57_cast_fp16 = tile(reps = k_exp_57_reps_0, x = var_2862_cast_fp16)[name = string("k_exp_57_cast_fp16")]; tensor var_2865 = const()[name = string("op_2865"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_59_cast_fp16 = reshape(shape = var_2865, x = k_exp_57_cast_fp16)[name = string("k_exp_59_cast_fp16")]; tensor var_2867_axes_0 = const()[name = string("op_2867_axes_0"), val = tensor([2])]; tensor var_2867_cast_fp16 = expand_dims(axes = var_2867_axes_0, x = coreml_update_state_171)[name = string("op_2867_cast_fp16")]; tensor v_exp_57_reps_0 = const()[name = string("v_exp_57_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_57_cast_fp16 = tile(reps = v_exp_57_reps_0, x = var_2867_cast_fp16)[name = string("v_exp_57_cast_fp16")]; tensor var_2870 = const()[name = string("op_2870"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_59_cast_fp16 = reshape(shape = var_2870, x = v_exp_57_cast_fp16)[name = string("v_exp_59_cast_fp16")]; bool var_2873_transpose_x_1 = const()[name = string("op_2873_transpose_x_1"), val = bool(false)]; bool var_2873_transpose_y_1 = const()[name = string("op_2873_transpose_y_1"), val = bool(true)]; tensor var_2873_cast_fp16 = matmul(transpose_x = var_2873_transpose_x_1, transpose_y = var_2873_transpose_y_1, x = q_29_cast_fp16, y = k_exp_59_cast_fp16)[name = string("op_2873_cast_fp16")]; fp16 var_2874_to_fp16 = const()[name = string("op_2874_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_57_cast_fp16 = mul(x = var_2873_cast_fp16, y = var_2874_to_fp16)[name = string("attn_57_cast_fp16")]; tensor input_141_cast_fp16 = add(x = attn_57_cast_fp16, y = attention_mask_to_fp16)[name = string("input_141_cast_fp16")]; tensor attn_59_cast_fp16 = softmax(axis = var_2739, x = input_141_cast_fp16)[name = string("attn_59_cast_fp16")]; bool out_29_transpose_x_0 = const()[name = string("out_29_transpose_x_0"), val = bool(false)]; bool out_29_transpose_y_0 = const()[name = string("out_29_transpose_y_0"), val = bool(false)]; tensor out_29_cast_fp16 = matmul(transpose_x = out_29_transpose_x_0, transpose_y = out_29_transpose_y_0, x = attn_59_cast_fp16, y = v_exp_59_cast_fp16)[name = string("out_29_cast_fp16")]; tensor var_2879_perm_0 = const()[name = string("op_2879_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2880 = const()[name = string("op_2880"), val = tensor([1, 1, -1])]; tensor var_2879_cast_fp16 = transpose(perm = var_2879_perm_0, x = out_29_cast_fp16)[name = string("transpose_52")]; tensor input_143_cast_fp16 = reshape(shape = var_2880, x = var_2879_cast_fp16)[name = string("input_143_cast_fp16")]; tensor layers_14_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224543488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226640704))))[name = string("layers_14_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_101_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_o_proj_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor x_383_cast_fp16 = add(x = x_363_cast_fp16, y = linear_101_cast_fp16)[name = string("x_383_cast_fp16")]; fp16 var_2738_promoted_3_to_fp16 = const()[name = string("op_2738_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2887_cast_fp16 = pow(x = x_383_cast_fp16, y = var_2738_promoted_3_to_fp16)[name = string("op_2887_cast_fp16")]; tensor var_2889_axes_0 = const()[name = string("op_2889_axes_0"), val = tensor([-1])]; bool var_2889_keep_dims_0 = const()[name = string("op_2889_keep_dims_0"), val = bool(true)]; tensor var_2889_cast_fp16 = reduce_mean(axes = var_2889_axes_0, keep_dims = var_2889_keep_dims_0, x = var_2887_cast_fp16)[name = string("op_2889_cast_fp16")]; fp16 var_2890_to_fp16 = const()[name = string("op_2890_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2891_cast_fp16 = add(x = var_2889_cast_fp16, y = var_2890_to_fp16)[name = string("op_2891_cast_fp16")]; fp32 norm_119_epsilon_0 = const()[name = string("norm_119_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_119_cast_fp16 = rsqrt(epsilon = norm_119_epsilon_0, x = var_2891_cast_fp16)[name = string("norm_119_cast_fp16")]; tensor var_2893_cast_fp16 = mul(x = x_383_cast_fp16, y = norm_119_cast_fp16)[name = string("op_2893_cast_fp16")]; tensor layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226641280)))]; tensor var_2894_cast_fp16 = mul(x = var_2893_cast_fp16, y = layers_14_post_attention_layernorm_weight_to_fp16)[name = string("op_2894_cast_fp16")]; tensor layers_14_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226643392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229789184))))[name = string("layers_14_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_gate_proj_weight_to_fp16_palettized, x = var_2894_cast_fp16)[name = string("linear_102_cast_fp16")]; tensor var_2904_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2904_cast_fp16")]; tensor layers_14_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229789760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232935552))))[name = string("layers_14_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_up_proj_weight_to_fp16_palettized, x = var_2894_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor input_149_cast_fp16 = mul(x = var_2904_cast_fp16, y = linear_103_cast_fp16)[name = string("input_149_cast_fp16")]; tensor layers_14_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232936128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236081920))))[name = string("layers_14_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_104_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_mlp_down_proj_weight_to_fp16_palettized, x = input_149_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor x_389_cast_fp16 = add(x = x_383_cast_fp16, y = linear_104_cast_fp16)[name = string("x_389_cast_fp16")]; int32 var_2924 = const()[name = string("op_2924"), val = int32(-1)]; fp16 var_2923_promoted_to_fp16 = const()[name = string("op_2923_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2933_cast_fp16 = pow(x = x_389_cast_fp16, y = var_2923_promoted_to_fp16)[name = string("op_2933_cast_fp16")]; tensor var_2935_axes_0 = const()[name = string("op_2935_axes_0"), val = tensor([-1])]; bool var_2935_keep_dims_0 = const()[name = string("op_2935_keep_dims_0"), val = bool(true)]; tensor var_2935_cast_fp16 = reduce_mean(axes = var_2935_axes_0, keep_dims = var_2935_keep_dims_0, x = var_2933_cast_fp16)[name = string("op_2935_cast_fp16")]; fp16 var_2936_to_fp16 = const()[name = string("op_2936_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2937_cast_fp16 = add(x = var_2935_cast_fp16, y = var_2936_to_fp16)[name = string("op_2937_cast_fp16")]; fp32 norm_121_epsilon_0 = const()[name = string("norm_121_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_121_cast_fp16 = rsqrt(epsilon = norm_121_epsilon_0, x = var_2937_cast_fp16)[name = string("norm_121_cast_fp16")]; tensor var_2939_cast_fp16 = mul(x = x_389_cast_fp16, y = norm_121_cast_fp16)[name = string("op_2939_cast_fp16")]; tensor layers_15_input_layernorm_weight_to_fp16 = const()[name = string("layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236082496)))]; tensor var_2940_cast_fp16 = mul(x = var_2939_cast_fp16, y = layers_15_input_layernorm_weight_to_fp16)[name = string("op_2940_cast_fp16")]; tensor layers_15_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236084608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238181824))))[name = string("layers_15_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_15_self_attn_q_proj_weight_to_fp16_palettized, x = var_2940_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor var_2956 = const()[name = string("op_2956"), val = tensor([1, 1, 16, 128])]; tensor var_2957_cast_fp16 = reshape(shape = var_2956, x = linear_105_cast_fp16)[name = string("op_2957_cast_fp16")]; tensor x_395_perm_0 = const()[name = string("x_395_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_15_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238182400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239231040))))[name = string("layers_15_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_k_proj_weight_to_fp16_palettized, x = var_2940_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor var_2961 = const()[name = string("op_2961"), val = tensor([1, 1, 8, 128])]; tensor var_2962_cast_fp16 = reshape(shape = var_2961, x = linear_106_cast_fp16)[name = string("op_2962_cast_fp16")]; tensor x_399_perm_0 = const()[name = string("x_399_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_15_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239231616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240280256))))[name = string("layers_15_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_v_proj_weight_to_fp16_palettized, x = var_2940_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor var_2966 = const()[name = string("op_2966"), val = tensor([1, 1, 8, 128])]; tensor var_2967_cast_fp16 = reshape(shape = var_2966, x = linear_107_cast_fp16)[name = string("op_2967_cast_fp16")]; tensor v_31_perm_0 = const()[name = string("v_31_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_2923_promoted_1_to_fp16 = const()[name = string("op_2923_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_395_cast_fp16 = transpose(perm = x_395_perm_0, x = var_2957_cast_fp16)[name = string("transpose_51")]; tensor var_2971_cast_fp16 = pow(x = x_395_cast_fp16, y = var_2923_promoted_1_to_fp16)[name = string("op_2971_cast_fp16")]; tensor var_2973_axes_0 = const()[name = string("op_2973_axes_0"), val = tensor([-1])]; bool var_2973_keep_dims_0 = const()[name = string("op_2973_keep_dims_0"), val = bool(true)]; tensor var_2973_cast_fp16 = reduce_mean(axes = var_2973_axes_0, keep_dims = var_2973_keep_dims_0, x = var_2971_cast_fp16)[name = string("op_2973_cast_fp16")]; fp16 var_2974_to_fp16 = const()[name = string("op_2974_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2975_cast_fp16 = add(x = var_2973_cast_fp16, y = var_2974_to_fp16)[name = string("op_2975_cast_fp16")]; fp32 norm_123_epsilon_0 = const()[name = string("norm_123_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_123_cast_fp16 = rsqrt(epsilon = norm_123_epsilon_0, x = var_2975_cast_fp16)[name = string("norm_123_cast_fp16")]; tensor var_2977_cast_fp16 = mul(x = x_395_cast_fp16, y = norm_123_cast_fp16)[name = string("op_2977_cast_fp16")]; tensor layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240280832)))]; tensor var_2978_cast_fp16 = mul(x = var_2977_cast_fp16, y = layers_15_self_attn_q_norm_weight_to_fp16)[name = string("op_2978_cast_fp16")]; fp16 var_2923_promoted_2_to_fp16 = const()[name = string("op_2923_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_399_cast_fp16 = transpose(perm = x_399_perm_0, x = var_2962_cast_fp16)[name = string("transpose_50")]; tensor var_2982_cast_fp16 = pow(x = x_399_cast_fp16, y = var_2923_promoted_2_to_fp16)[name = string("op_2982_cast_fp16")]; tensor var_2984_axes_0 = const()[name = string("op_2984_axes_0"), val = tensor([-1])]; bool var_2984_keep_dims_0 = const()[name = string("op_2984_keep_dims_0"), val = bool(true)]; tensor var_2984_cast_fp16 = reduce_mean(axes = var_2984_axes_0, keep_dims = var_2984_keep_dims_0, x = var_2982_cast_fp16)[name = string("op_2984_cast_fp16")]; fp16 var_2985_to_fp16 = const()[name = string("op_2985_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2986_cast_fp16 = add(x = var_2984_cast_fp16, y = var_2985_to_fp16)[name = string("op_2986_cast_fp16")]; fp32 norm_125_epsilon_0 = const()[name = string("norm_125_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_125_cast_fp16 = rsqrt(epsilon = norm_125_epsilon_0, x = var_2986_cast_fp16)[name = string("norm_125_cast_fp16")]; tensor var_2988_cast_fp16 = mul(x = x_399_cast_fp16, y = norm_125_cast_fp16)[name = string("op_2988_cast_fp16")]; tensor layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240281152)))]; tensor var_2989_cast_fp16 = mul(x = var_2988_cast_fp16, y = layers_15_self_attn_k_norm_weight_to_fp16)[name = string("op_2989_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = var_2978_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = var_2978_cast_fp16)[name = string("x2_61_cast_fp16")]; tensor var_3007_cast_fp16 = mul(x = x1_61_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3007_cast_fp16")]; tensor var_3008_cast_fp16 = mul(x = x2_61_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3008_cast_fp16")]; tensor var_3009_cast_fp16 = sub(x = var_3007_cast_fp16, y = var_3008_cast_fp16)[name = string("op_3009_cast_fp16")]; tensor var_3010_cast_fp16 = mul(x = x2_61_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3010_cast_fp16")]; tensor var_3011_cast_fp16 = mul(x = x1_61_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3011_cast_fp16")]; tensor var_3012_cast_fp16 = add(x = var_3010_cast_fp16, y = var_3011_cast_fp16)[name = string("op_3012_cast_fp16")]; bool q_31_interleave_0 = const()[name = string("q_31_interleave_0"), val = bool(false)]; tensor q_31_cast_fp16 = concat(axis = var_2924, interleave = q_31_interleave_0, values = (var_3009_cast_fp16, var_3012_cast_fp16))[name = string("q_31_cast_fp16")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = var_2989_cast_fp16)[name = string("x1_63_cast_fp16")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = var_2989_cast_fp16)[name = string("x2_63_cast_fp16")]; tensor var_3031_cast_fp16 = mul(x = x1_63_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3031_cast_fp16")]; tensor var_3032_cast_fp16 = mul(x = x2_63_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3032_cast_fp16")]; tensor var_3033_cast_fp16 = sub(x = var_3031_cast_fp16, y = var_3032_cast_fp16)[name = string("op_3033_cast_fp16")]; tensor var_3034_cast_fp16 = mul(x = x2_63_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3034_cast_fp16")]; tensor var_3035_cast_fp16 = mul(x = x1_63_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3035_cast_fp16")]; tensor var_3036_cast_fp16 = add(x = var_3034_cast_fp16, y = var_3035_cast_fp16)[name = string("op_3036_cast_fp16")]; bool k_31_interleave_0 = const()[name = string("k_31_interleave_0"), val = bool(false)]; tensor k_31_cast_fp16 = concat(axis = var_2924, interleave = k_31_interleave_0, values = (var_3033_cast_fp16, var_3036_cast_fp16))[name = string("k_31_cast_fp16")]; tensor read_state_30 = read_state(input = k_cache_15)[name = string("read_state_30")]; tensor k_cache_93_cast_fp16 = mul(x = read_state_30, y = var_264_cast_fp16)[name = string("k_cache_93_cast_fp16")]; write_state(data = k_cache_93_cast_fp16, input = k_cache_15)[name = string("coreml_update_state_172_write_state")]; tensor coreml_update_state_172 = read_state(input = k_cache_15)[name = string("coreml_update_state_172")]; tensor var_3041_cast_fp16 = mul(x = k_31_cast_fp16, y = onehot_cast_fp16)[name = string("op_3041_cast_fp16")]; tensor k_cache_95_cast_fp16 = add(x = coreml_update_state_172, y = var_3041_cast_fp16)[name = string("k_cache_95_cast_fp16")]; write_state(data = k_cache_95_cast_fp16, input = k_cache_15)[name = string("coreml_update_state_173_write_state")]; tensor coreml_update_state_173 = read_state(input = k_cache_15)[name = string("coreml_update_state_173")]; tensor read_state_31 = read_state(input = v_cache_15)[name = string("read_state_31")]; tensor v_cache_93_cast_fp16 = mul(x = read_state_31, y = var_264_cast_fp16)[name = string("v_cache_93_cast_fp16")]; write_state(data = v_cache_93_cast_fp16, input = v_cache_15)[name = string("coreml_update_state_174_write_state")]; tensor coreml_update_state_174 = read_state(input = v_cache_15)[name = string("coreml_update_state_174")]; tensor v_31_cast_fp16 = transpose(perm = v_31_perm_0, x = var_2967_cast_fp16)[name = string("transpose_49")]; tensor var_3045_cast_fp16 = mul(x = v_31_cast_fp16, y = onehot_cast_fp16)[name = string("op_3045_cast_fp16")]; tensor v_cache_95_cast_fp16 = add(x = coreml_update_state_174, y = var_3045_cast_fp16)[name = string("v_cache_95_cast_fp16")]; write_state(data = v_cache_95_cast_fp16, input = v_cache_15)[name = string("coreml_update_state_175_write_state")]; tensor coreml_update_state_175 = read_state(input = v_cache_15)[name = string("coreml_update_state_175")]; tensor var_3047_axes_0 = const()[name = string("op_3047_axes_0"), val = tensor([2])]; tensor var_3047_cast_fp16 = expand_dims(axes = var_3047_axes_0, x = coreml_update_state_173)[name = string("op_3047_cast_fp16")]; tensor k_exp_61_reps_0 = const()[name = string("k_exp_61_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_61_cast_fp16 = tile(reps = k_exp_61_reps_0, x = var_3047_cast_fp16)[name = string("k_exp_61_cast_fp16")]; tensor var_3050 = const()[name = string("op_3050"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_63_cast_fp16 = reshape(shape = var_3050, x = k_exp_61_cast_fp16)[name = string("k_exp_63_cast_fp16")]; tensor var_3052_axes_0 = const()[name = string("op_3052_axes_0"), val = tensor([2])]; tensor var_3052_cast_fp16 = expand_dims(axes = var_3052_axes_0, x = coreml_update_state_175)[name = string("op_3052_cast_fp16")]; tensor v_exp_61_reps_0 = const()[name = string("v_exp_61_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_61_cast_fp16 = tile(reps = v_exp_61_reps_0, x = var_3052_cast_fp16)[name = string("v_exp_61_cast_fp16")]; tensor var_3055 = const()[name = string("op_3055"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_63_cast_fp16 = reshape(shape = var_3055, x = v_exp_61_cast_fp16)[name = string("v_exp_63_cast_fp16")]; bool var_3058_transpose_x_1 = const()[name = string("op_3058_transpose_x_1"), val = bool(false)]; bool var_3058_transpose_y_1 = const()[name = string("op_3058_transpose_y_1"), val = bool(true)]; tensor var_3058_cast_fp16 = matmul(transpose_x = var_3058_transpose_x_1, transpose_y = var_3058_transpose_y_1, x = q_31_cast_fp16, y = k_exp_63_cast_fp16)[name = string("op_3058_cast_fp16")]; fp16 var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_61_cast_fp16 = mul(x = var_3058_cast_fp16, y = var_3059_to_fp16)[name = string("attn_61_cast_fp16")]; tensor input_151_cast_fp16 = add(x = attn_61_cast_fp16, y = attention_mask_to_fp16)[name = string("input_151_cast_fp16")]; tensor attn_63_cast_fp16 = softmax(axis = var_2924, x = input_151_cast_fp16)[name = string("attn_63_cast_fp16")]; bool out_31_transpose_x_0 = const()[name = string("out_31_transpose_x_0"), val = bool(false)]; bool out_31_transpose_y_0 = const()[name = string("out_31_transpose_y_0"), val = bool(false)]; tensor out_31_cast_fp16 = matmul(transpose_x = out_31_transpose_x_0, transpose_y = out_31_transpose_y_0, x = attn_63_cast_fp16, y = v_exp_63_cast_fp16)[name = string("out_31_cast_fp16")]; tensor var_3064_perm_0 = const()[name = string("op_3064_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3065 = const()[name = string("op_3065"), val = tensor([1, 1, -1])]; tensor var_3064_cast_fp16 = transpose(perm = var_3064_perm_0, x = out_31_cast_fp16)[name = string("transpose_48")]; tensor input_153_cast_fp16 = reshape(shape = var_3065, x = var_3064_cast_fp16)[name = string("input_153_cast_fp16")]; tensor layers_15_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240281472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242378688))))[name = string("layers_15_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_108_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_o_proj_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor x_409_cast_fp16 = add(x = x_389_cast_fp16, y = linear_108_cast_fp16)[name = string("x_409_cast_fp16")]; fp16 var_2923_promoted_3_to_fp16 = const()[name = string("op_2923_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3072_cast_fp16 = pow(x = x_409_cast_fp16, y = var_2923_promoted_3_to_fp16)[name = string("op_3072_cast_fp16")]; tensor var_3074_axes_0 = const()[name = string("op_3074_axes_0"), val = tensor([-1])]; bool var_3074_keep_dims_0 = const()[name = string("op_3074_keep_dims_0"), val = bool(true)]; tensor var_3074_cast_fp16 = reduce_mean(axes = var_3074_axes_0, keep_dims = var_3074_keep_dims_0, x = var_3072_cast_fp16)[name = string("op_3074_cast_fp16")]; fp16 var_3075_to_fp16 = const()[name = string("op_3075_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3076_cast_fp16 = add(x = var_3074_cast_fp16, y = var_3075_to_fp16)[name = string("op_3076_cast_fp16")]; fp32 norm_127_epsilon_0 = const()[name = string("norm_127_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_127_cast_fp16 = rsqrt(epsilon = norm_127_epsilon_0, x = var_3076_cast_fp16)[name = string("norm_127_cast_fp16")]; tensor var_3078_cast_fp16 = mul(x = x_409_cast_fp16, y = norm_127_cast_fp16)[name = string("op_3078_cast_fp16")]; tensor layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242379264)))]; tensor var_3079_cast_fp16 = mul(x = var_3078_cast_fp16, y = layers_15_post_attention_layernorm_weight_to_fp16)[name = string("op_3079_cast_fp16")]; tensor layers_15_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242381376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245527168))))[name = string("layers_15_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_gate_proj_weight_to_fp16_palettized, x = var_3079_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor var_3089_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_3089_cast_fp16")]; tensor layers_15_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248673536))))[name = string("layers_15_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_up_proj_weight_to_fp16_palettized, x = var_3079_cast_fp16)[name = string("linear_110_cast_fp16")]; tensor input_159_cast_fp16 = mul(x = var_3089_cast_fp16, y = linear_110_cast_fp16)[name = string("input_159_cast_fp16")]; tensor layers_15_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248674112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251819904))))[name = string("layers_15_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_111_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_mlp_down_proj_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor x_415_cast_fp16 = add(x = x_409_cast_fp16, y = linear_111_cast_fp16)[name = string("x_415_cast_fp16")]; int32 var_3109 = const()[name = string("op_3109"), val = int32(-1)]; fp16 var_3108_promoted_to_fp16 = const()[name = string("op_3108_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3118_cast_fp16 = pow(x = x_415_cast_fp16, y = var_3108_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; tensor var_3120_axes_0 = const()[name = string("op_3120_axes_0"), val = tensor([-1])]; bool var_3120_keep_dims_0 = const()[name = string("op_3120_keep_dims_0"), val = bool(true)]; tensor var_3120_cast_fp16 = reduce_mean(axes = var_3120_axes_0, keep_dims = var_3120_keep_dims_0, x = var_3118_cast_fp16)[name = string("op_3120_cast_fp16")]; fp16 var_3121_to_fp16 = const()[name = string("op_3121_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3122_cast_fp16 = add(x = var_3120_cast_fp16, y = var_3121_to_fp16)[name = string("op_3122_cast_fp16")]; fp32 norm_129_epsilon_0 = const()[name = string("norm_129_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_129_cast_fp16 = rsqrt(epsilon = norm_129_epsilon_0, x = var_3122_cast_fp16)[name = string("norm_129_cast_fp16")]; tensor var_3124_cast_fp16 = mul(x = x_415_cast_fp16, y = norm_129_cast_fp16)[name = string("op_3124_cast_fp16")]; tensor layers_16_input_layernorm_weight_to_fp16 = const()[name = string("layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251820480)))]; tensor var_3125_cast_fp16 = mul(x = var_3124_cast_fp16, y = layers_16_input_layernorm_weight_to_fp16)[name = string("op_3125_cast_fp16")]; tensor layers_16_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253919808))))[name = string("layers_16_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_16_self_attn_q_proj_weight_to_fp16_palettized, x = var_3125_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor var_3141 = const()[name = string("op_3141"), val = tensor([1, 1, 16, 128])]; tensor var_3142_cast_fp16 = reshape(shape = var_3141, x = linear_112_cast_fp16)[name = string("op_3142_cast_fp16")]; tensor x_421_perm_0 = const()[name = string("x_421_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_16_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253920384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254969024))))[name = string("layers_16_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_k_proj_weight_to_fp16_palettized, x = var_3125_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor var_3146 = const()[name = string("op_3146"), val = tensor([1, 1, 8, 128])]; tensor var_3147_cast_fp16 = reshape(shape = var_3146, x = linear_113_cast_fp16)[name = string("op_3147_cast_fp16")]; tensor x_425_perm_0 = const()[name = string("x_425_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_16_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254969600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256018240))))[name = string("layers_16_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_v_proj_weight_to_fp16_palettized, x = var_3125_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor var_3151 = const()[name = string("op_3151"), val = tensor([1, 1, 8, 128])]; tensor var_3152_cast_fp16 = reshape(shape = var_3151, x = linear_114_cast_fp16)[name = string("op_3152_cast_fp16")]; tensor v_33_perm_0 = const()[name = string("v_33_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_3108_promoted_1_to_fp16 = const()[name = string("op_3108_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_421_cast_fp16 = transpose(perm = x_421_perm_0, x = var_3142_cast_fp16)[name = string("transpose_47")]; tensor var_3156_cast_fp16 = pow(x = x_421_cast_fp16, y = var_3108_promoted_1_to_fp16)[name = string("op_3156_cast_fp16")]; tensor var_3158_axes_0 = const()[name = string("op_3158_axes_0"), val = tensor([-1])]; bool var_3158_keep_dims_0 = const()[name = string("op_3158_keep_dims_0"), val = bool(true)]; tensor var_3158_cast_fp16 = reduce_mean(axes = var_3158_axes_0, keep_dims = var_3158_keep_dims_0, x = var_3156_cast_fp16)[name = string("op_3158_cast_fp16")]; fp16 var_3159_to_fp16 = const()[name = string("op_3159_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3160_cast_fp16 = add(x = var_3158_cast_fp16, y = var_3159_to_fp16)[name = string("op_3160_cast_fp16")]; fp32 norm_131_epsilon_0 = const()[name = string("norm_131_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_131_cast_fp16 = rsqrt(epsilon = norm_131_epsilon_0, x = var_3160_cast_fp16)[name = string("norm_131_cast_fp16")]; tensor var_3162_cast_fp16 = mul(x = x_421_cast_fp16, y = norm_131_cast_fp16)[name = string("op_3162_cast_fp16")]; tensor layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256018816)))]; tensor var_3163_cast_fp16 = mul(x = var_3162_cast_fp16, y = layers_16_self_attn_q_norm_weight_to_fp16)[name = string("op_3163_cast_fp16")]; fp16 var_3108_promoted_2_to_fp16 = const()[name = string("op_3108_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_425_cast_fp16 = transpose(perm = x_425_perm_0, x = var_3147_cast_fp16)[name = string("transpose_46")]; tensor var_3167_cast_fp16 = pow(x = x_425_cast_fp16, y = var_3108_promoted_2_to_fp16)[name = string("op_3167_cast_fp16")]; tensor var_3169_axes_0 = const()[name = string("op_3169_axes_0"), val = tensor([-1])]; bool var_3169_keep_dims_0 = const()[name = string("op_3169_keep_dims_0"), val = bool(true)]; tensor var_3169_cast_fp16 = reduce_mean(axes = var_3169_axes_0, keep_dims = var_3169_keep_dims_0, x = var_3167_cast_fp16)[name = string("op_3169_cast_fp16")]; fp16 var_3170_to_fp16 = const()[name = string("op_3170_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3171_cast_fp16 = add(x = var_3169_cast_fp16, y = var_3170_to_fp16)[name = string("op_3171_cast_fp16")]; fp32 norm_133_epsilon_0 = const()[name = string("norm_133_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_133_cast_fp16 = rsqrt(epsilon = norm_133_epsilon_0, x = var_3171_cast_fp16)[name = string("norm_133_cast_fp16")]; tensor var_3173_cast_fp16 = mul(x = x_425_cast_fp16, y = norm_133_cast_fp16)[name = string("op_3173_cast_fp16")]; tensor layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256019136)))]; tensor var_3174_cast_fp16 = mul(x = var_3173_cast_fp16, y = layers_16_self_attn_k_norm_weight_to_fp16)[name = string("op_3174_cast_fp16")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = var_3163_cast_fp16)[name = string("x1_65_cast_fp16")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = var_3163_cast_fp16)[name = string("x2_65_cast_fp16")]; tensor var_3192_cast_fp16 = mul(x = x1_65_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3192_cast_fp16")]; tensor var_3193_cast_fp16 = mul(x = x2_65_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3193_cast_fp16")]; tensor var_3194_cast_fp16 = sub(x = var_3192_cast_fp16, y = var_3193_cast_fp16)[name = string("op_3194_cast_fp16")]; tensor var_3195_cast_fp16 = mul(x = x2_65_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3195_cast_fp16")]; tensor var_3196_cast_fp16 = mul(x = x1_65_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3196_cast_fp16")]; tensor var_3197_cast_fp16 = add(x = var_3195_cast_fp16, y = var_3196_cast_fp16)[name = string("op_3197_cast_fp16")]; bool q_33_interleave_0 = const()[name = string("q_33_interleave_0"), val = bool(false)]; tensor q_33_cast_fp16 = concat(axis = var_3109, interleave = q_33_interleave_0, values = (var_3194_cast_fp16, var_3197_cast_fp16))[name = string("q_33_cast_fp16")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = var_3174_cast_fp16)[name = string("x1_67_cast_fp16")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = var_3174_cast_fp16)[name = string("x2_67_cast_fp16")]; tensor var_3216_cast_fp16 = mul(x = x1_67_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3216_cast_fp16")]; tensor var_3217_cast_fp16 = mul(x = x2_67_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3217_cast_fp16")]; tensor var_3218_cast_fp16 = sub(x = var_3216_cast_fp16, y = var_3217_cast_fp16)[name = string("op_3218_cast_fp16")]; tensor var_3219_cast_fp16 = mul(x = x2_67_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3219_cast_fp16")]; tensor var_3220_cast_fp16 = mul(x = x1_67_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3220_cast_fp16")]; tensor var_3221_cast_fp16 = add(x = var_3219_cast_fp16, y = var_3220_cast_fp16)[name = string("op_3221_cast_fp16")]; bool k_33_interleave_0 = const()[name = string("k_33_interleave_0"), val = bool(false)]; tensor k_33_cast_fp16 = concat(axis = var_3109, interleave = k_33_interleave_0, values = (var_3218_cast_fp16, var_3221_cast_fp16))[name = string("k_33_cast_fp16")]; tensor read_state_32 = read_state(input = k_cache_16)[name = string("read_state_32")]; tensor k_cache_99_cast_fp16 = mul(x = read_state_32, y = var_264_cast_fp16)[name = string("k_cache_99_cast_fp16")]; write_state(data = k_cache_99_cast_fp16, input = k_cache_16)[name = string("coreml_update_state_176_write_state")]; tensor coreml_update_state_176 = read_state(input = k_cache_16)[name = string("coreml_update_state_176")]; tensor var_3226_cast_fp16 = mul(x = k_33_cast_fp16, y = onehot_cast_fp16)[name = string("op_3226_cast_fp16")]; tensor k_cache_101_cast_fp16 = add(x = coreml_update_state_176, y = var_3226_cast_fp16)[name = string("k_cache_101_cast_fp16")]; write_state(data = k_cache_101_cast_fp16, input = k_cache_16)[name = string("coreml_update_state_177_write_state")]; tensor coreml_update_state_177 = read_state(input = k_cache_16)[name = string("coreml_update_state_177")]; tensor read_state_33 = read_state(input = v_cache_16)[name = string("read_state_33")]; tensor v_cache_99_cast_fp16 = mul(x = read_state_33, y = var_264_cast_fp16)[name = string("v_cache_99_cast_fp16")]; write_state(data = v_cache_99_cast_fp16, input = v_cache_16)[name = string("coreml_update_state_178_write_state")]; tensor coreml_update_state_178 = read_state(input = v_cache_16)[name = string("coreml_update_state_178")]; tensor v_33_cast_fp16 = transpose(perm = v_33_perm_0, x = var_3152_cast_fp16)[name = string("transpose_45")]; tensor var_3230_cast_fp16 = mul(x = v_33_cast_fp16, y = onehot_cast_fp16)[name = string("op_3230_cast_fp16")]; tensor v_cache_101_cast_fp16 = add(x = coreml_update_state_178, y = var_3230_cast_fp16)[name = string("v_cache_101_cast_fp16")]; write_state(data = v_cache_101_cast_fp16, input = v_cache_16)[name = string("coreml_update_state_179_write_state")]; tensor coreml_update_state_179 = read_state(input = v_cache_16)[name = string("coreml_update_state_179")]; tensor var_3232_axes_0 = const()[name = string("op_3232_axes_0"), val = tensor([2])]; tensor var_3232_cast_fp16 = expand_dims(axes = var_3232_axes_0, x = coreml_update_state_177)[name = string("op_3232_cast_fp16")]; tensor k_exp_65_reps_0 = const()[name = string("k_exp_65_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_65_cast_fp16 = tile(reps = k_exp_65_reps_0, x = var_3232_cast_fp16)[name = string("k_exp_65_cast_fp16")]; tensor var_3235 = const()[name = string("op_3235"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_67_cast_fp16 = reshape(shape = var_3235, x = k_exp_65_cast_fp16)[name = string("k_exp_67_cast_fp16")]; tensor var_3237_axes_0 = const()[name = string("op_3237_axes_0"), val = tensor([2])]; tensor var_3237_cast_fp16 = expand_dims(axes = var_3237_axes_0, x = coreml_update_state_179)[name = string("op_3237_cast_fp16")]; tensor v_exp_65_reps_0 = const()[name = string("v_exp_65_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_65_cast_fp16 = tile(reps = v_exp_65_reps_0, x = var_3237_cast_fp16)[name = string("v_exp_65_cast_fp16")]; tensor var_3240 = const()[name = string("op_3240"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_67_cast_fp16 = reshape(shape = var_3240, x = v_exp_65_cast_fp16)[name = string("v_exp_67_cast_fp16")]; bool var_3243_transpose_x_1 = const()[name = string("op_3243_transpose_x_1"), val = bool(false)]; bool var_3243_transpose_y_1 = const()[name = string("op_3243_transpose_y_1"), val = bool(true)]; tensor var_3243_cast_fp16 = matmul(transpose_x = var_3243_transpose_x_1, transpose_y = var_3243_transpose_y_1, x = q_33_cast_fp16, y = k_exp_67_cast_fp16)[name = string("op_3243_cast_fp16")]; fp16 var_3244_to_fp16 = const()[name = string("op_3244_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_65_cast_fp16 = mul(x = var_3243_cast_fp16, y = var_3244_to_fp16)[name = string("attn_65_cast_fp16")]; tensor input_161_cast_fp16 = add(x = attn_65_cast_fp16, y = attention_mask_to_fp16)[name = string("input_161_cast_fp16")]; tensor attn_67_cast_fp16 = softmax(axis = var_3109, x = input_161_cast_fp16)[name = string("attn_67_cast_fp16")]; bool out_33_transpose_x_0 = const()[name = string("out_33_transpose_x_0"), val = bool(false)]; bool out_33_transpose_y_0 = const()[name = string("out_33_transpose_y_0"), val = bool(false)]; tensor out_33_cast_fp16 = matmul(transpose_x = out_33_transpose_x_0, transpose_y = out_33_transpose_y_0, x = attn_67_cast_fp16, y = v_exp_67_cast_fp16)[name = string("out_33_cast_fp16")]; tensor var_3249_perm_0 = const()[name = string("op_3249_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3250 = const()[name = string("op_3250"), val = tensor([1, 1, -1])]; tensor var_3249_cast_fp16 = transpose(perm = var_3249_perm_0, x = out_33_cast_fp16)[name = string("transpose_44")]; tensor input_163_cast_fp16 = reshape(shape = var_3250, x = var_3249_cast_fp16)[name = string("input_163_cast_fp16")]; tensor layers_16_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256019456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258116672))))[name = string("layers_16_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_o_proj_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor x_435_cast_fp16 = add(x = x_415_cast_fp16, y = linear_115_cast_fp16)[name = string("x_435_cast_fp16")]; fp16 var_3108_promoted_3_to_fp16 = const()[name = string("op_3108_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3257_cast_fp16 = pow(x = x_435_cast_fp16, y = var_3108_promoted_3_to_fp16)[name = string("op_3257_cast_fp16")]; tensor var_3259_axes_0 = const()[name = string("op_3259_axes_0"), val = tensor([-1])]; bool var_3259_keep_dims_0 = const()[name = string("op_3259_keep_dims_0"), val = bool(true)]; tensor var_3259_cast_fp16 = reduce_mean(axes = var_3259_axes_0, keep_dims = var_3259_keep_dims_0, x = var_3257_cast_fp16)[name = string("op_3259_cast_fp16")]; fp16 var_3260_to_fp16 = const()[name = string("op_3260_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3261_cast_fp16 = add(x = var_3259_cast_fp16, y = var_3260_to_fp16)[name = string("op_3261_cast_fp16")]; fp32 norm_135_epsilon_0 = const()[name = string("norm_135_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_135_cast_fp16 = rsqrt(epsilon = norm_135_epsilon_0, x = var_3261_cast_fp16)[name = string("norm_135_cast_fp16")]; tensor var_3263_cast_fp16 = mul(x = x_435_cast_fp16, y = norm_135_cast_fp16)[name = string("op_3263_cast_fp16")]; tensor layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258117248)))]; tensor var_3264_cast_fp16 = mul(x = var_3263_cast_fp16, y = layers_16_post_attention_layernorm_weight_to_fp16)[name = string("op_3264_cast_fp16")]; tensor layers_16_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258119360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261265152))))[name = string("layers_16_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_gate_proj_weight_to_fp16_palettized, x = var_3264_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor var_3274_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_3274_cast_fp16")]; tensor layers_16_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261265728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264411520))))[name = string("layers_16_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_up_proj_weight_to_fp16_palettized, x = var_3264_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor input_169_cast_fp16 = mul(x = var_3274_cast_fp16, y = linear_117_cast_fp16)[name = string("input_169_cast_fp16")]; tensor layers_16_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264412096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267557888))))[name = string("layers_16_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_mlp_down_proj_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = string("linear_118_cast_fp16")]; tensor x_441_cast_fp16 = add(x = x_435_cast_fp16, y = linear_118_cast_fp16)[name = string("x_441_cast_fp16")]; int32 var_3294 = const()[name = string("op_3294"), val = int32(-1)]; fp16 var_3293_promoted_to_fp16 = const()[name = string("op_3293_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3303_cast_fp16 = pow(x = x_441_cast_fp16, y = var_3293_promoted_to_fp16)[name = string("op_3303_cast_fp16")]; tensor var_3305_axes_0 = const()[name = string("op_3305_axes_0"), val = tensor([-1])]; bool var_3305_keep_dims_0 = const()[name = string("op_3305_keep_dims_0"), val = bool(true)]; tensor var_3305_cast_fp16 = reduce_mean(axes = var_3305_axes_0, keep_dims = var_3305_keep_dims_0, x = var_3303_cast_fp16)[name = string("op_3305_cast_fp16")]; fp16 var_3306_to_fp16 = const()[name = string("op_3306_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3307_cast_fp16 = add(x = var_3305_cast_fp16, y = var_3306_to_fp16)[name = string("op_3307_cast_fp16")]; fp32 norm_137_epsilon_0 = const()[name = string("norm_137_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_137_cast_fp16 = rsqrt(epsilon = norm_137_epsilon_0, x = var_3307_cast_fp16)[name = string("norm_137_cast_fp16")]; tensor var_3309_cast_fp16 = mul(x = x_441_cast_fp16, y = norm_137_cast_fp16)[name = string("op_3309_cast_fp16")]; tensor layers_17_input_layernorm_weight_to_fp16 = const()[name = string("layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267558464)))]; tensor var_3310_cast_fp16 = mul(x = var_3309_cast_fp16, y = layers_17_input_layernorm_weight_to_fp16)[name = string("op_3310_cast_fp16")]; tensor layers_17_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267560576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269657792))))[name = string("layers_17_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_17_self_attn_q_proj_weight_to_fp16_palettized, x = var_3310_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor var_3326 = const()[name = string("op_3326"), val = tensor([1, 1, 16, 128])]; tensor var_3327_cast_fp16 = reshape(shape = var_3326, x = linear_119_cast_fp16)[name = string("op_3327_cast_fp16")]; tensor x_447_perm_0 = const()[name = string("x_447_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_17_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269658368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270707008))))[name = string("layers_17_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_k_proj_weight_to_fp16_palettized, x = var_3310_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor var_3331 = const()[name = string("op_3331"), val = tensor([1, 1, 8, 128])]; tensor var_3332_cast_fp16 = reshape(shape = var_3331, x = linear_120_cast_fp16)[name = string("op_3332_cast_fp16")]; tensor x_451_perm_0 = const()[name = string("x_451_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_17_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270707584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271756224))))[name = string("layers_17_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_v_proj_weight_to_fp16_palettized, x = var_3310_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor var_3336 = const()[name = string("op_3336"), val = tensor([1, 1, 8, 128])]; tensor var_3337_cast_fp16 = reshape(shape = var_3336, x = linear_121_cast_fp16)[name = string("op_3337_cast_fp16")]; tensor v_35_perm_0 = const()[name = string("v_35_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_3293_promoted_1_to_fp16 = const()[name = string("op_3293_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_447_cast_fp16 = transpose(perm = x_447_perm_0, x = var_3327_cast_fp16)[name = string("transpose_43")]; tensor var_3341_cast_fp16 = pow(x = x_447_cast_fp16, y = var_3293_promoted_1_to_fp16)[name = string("op_3341_cast_fp16")]; tensor var_3343_axes_0 = const()[name = string("op_3343_axes_0"), val = tensor([-1])]; bool var_3343_keep_dims_0 = const()[name = string("op_3343_keep_dims_0"), val = bool(true)]; tensor var_3343_cast_fp16 = reduce_mean(axes = var_3343_axes_0, keep_dims = var_3343_keep_dims_0, x = var_3341_cast_fp16)[name = string("op_3343_cast_fp16")]; fp16 var_3344_to_fp16 = const()[name = string("op_3344_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3345_cast_fp16 = add(x = var_3343_cast_fp16, y = var_3344_to_fp16)[name = string("op_3345_cast_fp16")]; fp32 norm_139_epsilon_0 = const()[name = string("norm_139_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_139_cast_fp16 = rsqrt(epsilon = norm_139_epsilon_0, x = var_3345_cast_fp16)[name = string("norm_139_cast_fp16")]; tensor var_3347_cast_fp16 = mul(x = x_447_cast_fp16, y = norm_139_cast_fp16)[name = string("op_3347_cast_fp16")]; tensor layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271756800)))]; tensor var_3348_cast_fp16 = mul(x = var_3347_cast_fp16, y = layers_17_self_attn_q_norm_weight_to_fp16)[name = string("op_3348_cast_fp16")]; fp16 var_3293_promoted_2_to_fp16 = const()[name = string("op_3293_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_451_cast_fp16 = transpose(perm = x_451_perm_0, x = var_3332_cast_fp16)[name = string("transpose_42")]; tensor var_3352_cast_fp16 = pow(x = x_451_cast_fp16, y = var_3293_promoted_2_to_fp16)[name = string("op_3352_cast_fp16")]; tensor var_3354_axes_0 = const()[name = string("op_3354_axes_0"), val = tensor([-1])]; bool var_3354_keep_dims_0 = const()[name = string("op_3354_keep_dims_0"), val = bool(true)]; tensor var_3354_cast_fp16 = reduce_mean(axes = var_3354_axes_0, keep_dims = var_3354_keep_dims_0, x = var_3352_cast_fp16)[name = string("op_3354_cast_fp16")]; fp16 var_3355_to_fp16 = const()[name = string("op_3355_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3356_cast_fp16 = add(x = var_3354_cast_fp16, y = var_3355_to_fp16)[name = string("op_3356_cast_fp16")]; fp32 norm_141_epsilon_0 = const()[name = string("norm_141_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_141_cast_fp16 = rsqrt(epsilon = norm_141_epsilon_0, x = var_3356_cast_fp16)[name = string("norm_141_cast_fp16")]; tensor var_3358_cast_fp16 = mul(x = x_451_cast_fp16, y = norm_141_cast_fp16)[name = string("op_3358_cast_fp16")]; tensor layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271757120)))]; tensor var_3359_cast_fp16 = mul(x = var_3358_cast_fp16, y = layers_17_self_attn_k_norm_weight_to_fp16)[name = string("op_3359_cast_fp16")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = var_3348_cast_fp16)[name = string("x1_69_cast_fp16")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = var_3348_cast_fp16)[name = string("x2_69_cast_fp16")]; tensor var_3377_cast_fp16 = mul(x = x1_69_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3377_cast_fp16")]; tensor var_3378_cast_fp16 = mul(x = x2_69_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3378_cast_fp16")]; tensor var_3379_cast_fp16 = sub(x = var_3377_cast_fp16, y = var_3378_cast_fp16)[name = string("op_3379_cast_fp16")]; tensor var_3380_cast_fp16 = mul(x = x2_69_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3380_cast_fp16")]; tensor var_3381_cast_fp16 = mul(x = x1_69_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3381_cast_fp16")]; tensor var_3382_cast_fp16 = add(x = var_3380_cast_fp16, y = var_3381_cast_fp16)[name = string("op_3382_cast_fp16")]; bool q_35_interleave_0 = const()[name = string("q_35_interleave_0"), val = bool(false)]; tensor q_35_cast_fp16 = concat(axis = var_3294, interleave = q_35_interleave_0, values = (var_3379_cast_fp16, var_3382_cast_fp16))[name = string("q_35_cast_fp16")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = var_3359_cast_fp16)[name = string("x1_71_cast_fp16")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = var_3359_cast_fp16)[name = string("x2_71_cast_fp16")]; tensor var_3401_cast_fp16 = mul(x = x1_71_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3401_cast_fp16")]; tensor var_3402_cast_fp16 = mul(x = x2_71_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3402_cast_fp16")]; tensor var_3403_cast_fp16 = sub(x = var_3401_cast_fp16, y = var_3402_cast_fp16)[name = string("op_3403_cast_fp16")]; tensor var_3404_cast_fp16 = mul(x = x2_71_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3404_cast_fp16")]; tensor var_3405_cast_fp16 = mul(x = x1_71_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3405_cast_fp16")]; tensor var_3406_cast_fp16 = add(x = var_3404_cast_fp16, y = var_3405_cast_fp16)[name = string("op_3406_cast_fp16")]; bool k_35_interleave_0 = const()[name = string("k_35_interleave_0"), val = bool(false)]; tensor k_35_cast_fp16 = concat(axis = var_3294, interleave = k_35_interleave_0, values = (var_3403_cast_fp16, var_3406_cast_fp16))[name = string("k_35_cast_fp16")]; tensor read_state_34 = read_state(input = k_cache_17)[name = string("read_state_34")]; tensor k_cache_105_cast_fp16 = mul(x = read_state_34, y = var_264_cast_fp16)[name = string("k_cache_105_cast_fp16")]; write_state(data = k_cache_105_cast_fp16, input = k_cache_17)[name = string("coreml_update_state_180_write_state")]; tensor coreml_update_state_180 = read_state(input = k_cache_17)[name = string("coreml_update_state_180")]; tensor var_3411_cast_fp16 = mul(x = k_35_cast_fp16, y = onehot_cast_fp16)[name = string("op_3411_cast_fp16")]; tensor k_cache_107_cast_fp16 = add(x = coreml_update_state_180, y = var_3411_cast_fp16)[name = string("k_cache_107_cast_fp16")]; write_state(data = k_cache_107_cast_fp16, input = k_cache_17)[name = string("coreml_update_state_181_write_state")]; tensor coreml_update_state_181 = read_state(input = k_cache_17)[name = string("coreml_update_state_181")]; tensor read_state_35 = read_state(input = v_cache_17)[name = string("read_state_35")]; tensor v_cache_105_cast_fp16 = mul(x = read_state_35, y = var_264_cast_fp16)[name = string("v_cache_105_cast_fp16")]; write_state(data = v_cache_105_cast_fp16, input = v_cache_17)[name = string("coreml_update_state_182_write_state")]; tensor coreml_update_state_182 = read_state(input = v_cache_17)[name = string("coreml_update_state_182")]; tensor v_35_cast_fp16 = transpose(perm = v_35_perm_0, x = var_3337_cast_fp16)[name = string("transpose_41")]; tensor var_3415_cast_fp16 = mul(x = v_35_cast_fp16, y = onehot_cast_fp16)[name = string("op_3415_cast_fp16")]; tensor v_cache_107_cast_fp16 = add(x = coreml_update_state_182, y = var_3415_cast_fp16)[name = string("v_cache_107_cast_fp16")]; write_state(data = v_cache_107_cast_fp16, input = v_cache_17)[name = string("coreml_update_state_183_write_state")]; tensor coreml_update_state_183 = read_state(input = v_cache_17)[name = string("coreml_update_state_183")]; tensor var_3417_axes_0 = const()[name = string("op_3417_axes_0"), val = tensor([2])]; tensor var_3417_cast_fp16 = expand_dims(axes = var_3417_axes_0, x = coreml_update_state_181)[name = string("op_3417_cast_fp16")]; tensor k_exp_69_reps_0 = const()[name = string("k_exp_69_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_69_cast_fp16 = tile(reps = k_exp_69_reps_0, x = var_3417_cast_fp16)[name = string("k_exp_69_cast_fp16")]; tensor var_3420 = const()[name = string("op_3420"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_71_cast_fp16 = reshape(shape = var_3420, x = k_exp_69_cast_fp16)[name = string("k_exp_71_cast_fp16")]; tensor var_3422_axes_0 = const()[name = string("op_3422_axes_0"), val = tensor([2])]; tensor var_3422_cast_fp16 = expand_dims(axes = var_3422_axes_0, x = coreml_update_state_183)[name = string("op_3422_cast_fp16")]; tensor v_exp_69_reps_0 = const()[name = string("v_exp_69_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_69_cast_fp16 = tile(reps = v_exp_69_reps_0, x = var_3422_cast_fp16)[name = string("v_exp_69_cast_fp16")]; tensor var_3425 = const()[name = string("op_3425"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_71_cast_fp16 = reshape(shape = var_3425, x = v_exp_69_cast_fp16)[name = string("v_exp_71_cast_fp16")]; bool var_3428_transpose_x_1 = const()[name = string("op_3428_transpose_x_1"), val = bool(false)]; bool var_3428_transpose_y_1 = const()[name = string("op_3428_transpose_y_1"), val = bool(true)]; tensor var_3428_cast_fp16 = matmul(transpose_x = var_3428_transpose_x_1, transpose_y = var_3428_transpose_y_1, x = q_35_cast_fp16, y = k_exp_71_cast_fp16)[name = string("op_3428_cast_fp16")]; fp16 var_3429_to_fp16 = const()[name = string("op_3429_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_69_cast_fp16 = mul(x = var_3428_cast_fp16, y = var_3429_to_fp16)[name = string("attn_69_cast_fp16")]; tensor input_171_cast_fp16 = add(x = attn_69_cast_fp16, y = attention_mask_to_fp16)[name = string("input_171_cast_fp16")]; tensor attn_71_cast_fp16 = softmax(axis = var_3294, x = input_171_cast_fp16)[name = string("attn_71_cast_fp16")]; bool out_35_transpose_x_0 = const()[name = string("out_35_transpose_x_0"), val = bool(false)]; bool out_35_transpose_y_0 = const()[name = string("out_35_transpose_y_0"), val = bool(false)]; tensor out_35_cast_fp16 = matmul(transpose_x = out_35_transpose_x_0, transpose_y = out_35_transpose_y_0, x = attn_71_cast_fp16, y = v_exp_71_cast_fp16)[name = string("out_35_cast_fp16")]; tensor var_3434_perm_0 = const()[name = string("op_3434_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3435 = const()[name = string("op_3435"), val = tensor([1, 1, -1])]; tensor var_3434_cast_fp16 = transpose(perm = var_3434_perm_0, x = out_35_cast_fp16)[name = string("transpose_40")]; tensor input_173_cast_fp16 = reshape(shape = var_3435, x = var_3434_cast_fp16)[name = string("input_173_cast_fp16")]; tensor layers_17_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271757440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273854656))))[name = string("layers_17_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_122_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_o_proj_weight_to_fp16_palettized, x = input_173_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor x_461_cast_fp16 = add(x = x_441_cast_fp16, y = linear_122_cast_fp16)[name = string("x_461_cast_fp16")]; fp16 var_3293_promoted_3_to_fp16 = const()[name = string("op_3293_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3442_cast_fp16 = pow(x = x_461_cast_fp16, y = var_3293_promoted_3_to_fp16)[name = string("op_3442_cast_fp16")]; tensor var_3444_axes_0 = const()[name = string("op_3444_axes_0"), val = tensor([-1])]; bool var_3444_keep_dims_0 = const()[name = string("op_3444_keep_dims_0"), val = bool(true)]; tensor var_3444_cast_fp16 = reduce_mean(axes = var_3444_axes_0, keep_dims = var_3444_keep_dims_0, x = var_3442_cast_fp16)[name = string("op_3444_cast_fp16")]; fp16 var_3445_to_fp16 = const()[name = string("op_3445_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3446_cast_fp16 = add(x = var_3444_cast_fp16, y = var_3445_to_fp16)[name = string("op_3446_cast_fp16")]; fp32 norm_143_epsilon_0 = const()[name = string("norm_143_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_143_cast_fp16 = rsqrt(epsilon = norm_143_epsilon_0, x = var_3446_cast_fp16)[name = string("norm_143_cast_fp16")]; tensor var_3448_cast_fp16 = mul(x = x_461_cast_fp16, y = norm_143_cast_fp16)[name = string("op_3448_cast_fp16")]; tensor layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273855232)))]; tensor var_3449_cast_fp16 = mul(x = var_3448_cast_fp16, y = layers_17_post_attention_layernorm_weight_to_fp16)[name = string("op_3449_cast_fp16")]; tensor layers_17_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273857344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277003136))))[name = string("layers_17_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_gate_proj_weight_to_fp16_palettized, x = var_3449_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor var_3459_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor layers_17_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277003712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280149504))))[name = string("layers_17_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_up_proj_weight_to_fp16_palettized, x = var_3449_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor input_179_cast_fp16 = mul(x = var_3459_cast_fp16, y = linear_124_cast_fp16)[name = string("input_179_cast_fp16")]; tensor layers_17_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280150080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283295872))))[name = string("layers_17_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_mlp_down_proj_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor x_467_cast_fp16 = add(x = x_461_cast_fp16, y = linear_125_cast_fp16)[name = string("x_467_cast_fp16")]; int32 var_3479 = const()[name = string("op_3479"), val = int32(-1)]; fp16 var_3478_promoted_to_fp16 = const()[name = string("op_3478_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3488_cast_fp16 = pow(x = x_467_cast_fp16, y = var_3478_promoted_to_fp16)[name = string("op_3488_cast_fp16")]; tensor var_3490_axes_0 = const()[name = string("op_3490_axes_0"), val = tensor([-1])]; bool var_3490_keep_dims_0 = const()[name = string("op_3490_keep_dims_0"), val = bool(true)]; tensor var_3490_cast_fp16 = reduce_mean(axes = var_3490_axes_0, keep_dims = var_3490_keep_dims_0, x = var_3488_cast_fp16)[name = string("op_3490_cast_fp16")]; fp16 var_3491_to_fp16 = const()[name = string("op_3491_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3492_cast_fp16 = add(x = var_3490_cast_fp16, y = var_3491_to_fp16)[name = string("op_3492_cast_fp16")]; fp32 norm_145_epsilon_0 = const()[name = string("norm_145_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_145_cast_fp16 = rsqrt(epsilon = norm_145_epsilon_0, x = var_3492_cast_fp16)[name = string("norm_145_cast_fp16")]; tensor var_3494_cast_fp16 = mul(x = x_467_cast_fp16, y = norm_145_cast_fp16)[name = string("op_3494_cast_fp16")]; tensor layers_18_input_layernorm_weight_to_fp16 = const()[name = string("layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283296448)))]; tensor var_3495_cast_fp16 = mul(x = var_3494_cast_fp16, y = layers_18_input_layernorm_weight_to_fp16)[name = string("op_3495_cast_fp16")]; tensor layers_18_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283298560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285395776))))[name = string("layers_18_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_18_self_attn_q_proj_weight_to_fp16_palettized, x = var_3495_cast_fp16)[name = string("linear_126_cast_fp16")]; tensor var_3511 = const()[name = string("op_3511"), val = tensor([1, 1, 16, 128])]; tensor var_3512_cast_fp16 = reshape(shape = var_3511, x = linear_126_cast_fp16)[name = string("op_3512_cast_fp16")]; tensor x_473_perm_0 = const()[name = string("x_473_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_18_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285396352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286444992))))[name = string("layers_18_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_k_proj_weight_to_fp16_palettized, x = var_3495_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor var_3516 = const()[name = string("op_3516"), val = tensor([1, 1, 8, 128])]; tensor var_3517_cast_fp16 = reshape(shape = var_3516, x = linear_127_cast_fp16)[name = string("op_3517_cast_fp16")]; tensor x_477_perm_0 = const()[name = string("x_477_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_18_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286445568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287494208))))[name = string("layers_18_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_v_proj_weight_to_fp16_palettized, x = var_3495_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor var_3521 = const()[name = string("op_3521"), val = tensor([1, 1, 8, 128])]; tensor var_3522_cast_fp16 = reshape(shape = var_3521, x = linear_128_cast_fp16)[name = string("op_3522_cast_fp16")]; tensor v_37_perm_0 = const()[name = string("v_37_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_3478_promoted_1_to_fp16 = const()[name = string("op_3478_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_473_cast_fp16 = transpose(perm = x_473_perm_0, x = var_3512_cast_fp16)[name = string("transpose_39")]; tensor var_3526_cast_fp16 = pow(x = x_473_cast_fp16, y = var_3478_promoted_1_to_fp16)[name = string("op_3526_cast_fp16")]; tensor var_3528_axes_0 = const()[name = string("op_3528_axes_0"), val = tensor([-1])]; bool var_3528_keep_dims_0 = const()[name = string("op_3528_keep_dims_0"), val = bool(true)]; tensor var_3528_cast_fp16 = reduce_mean(axes = var_3528_axes_0, keep_dims = var_3528_keep_dims_0, x = var_3526_cast_fp16)[name = string("op_3528_cast_fp16")]; fp16 var_3529_to_fp16 = const()[name = string("op_3529_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3530_cast_fp16 = add(x = var_3528_cast_fp16, y = var_3529_to_fp16)[name = string("op_3530_cast_fp16")]; fp32 norm_147_epsilon_0 = const()[name = string("norm_147_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_147_cast_fp16 = rsqrt(epsilon = norm_147_epsilon_0, x = var_3530_cast_fp16)[name = string("norm_147_cast_fp16")]; tensor var_3532_cast_fp16 = mul(x = x_473_cast_fp16, y = norm_147_cast_fp16)[name = string("op_3532_cast_fp16")]; tensor layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287494784)))]; tensor var_3533_cast_fp16 = mul(x = var_3532_cast_fp16, y = layers_18_self_attn_q_norm_weight_to_fp16)[name = string("op_3533_cast_fp16")]; fp16 var_3478_promoted_2_to_fp16 = const()[name = string("op_3478_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_477_cast_fp16 = transpose(perm = x_477_perm_0, x = var_3517_cast_fp16)[name = string("transpose_38")]; tensor var_3537_cast_fp16 = pow(x = x_477_cast_fp16, y = var_3478_promoted_2_to_fp16)[name = string("op_3537_cast_fp16")]; tensor var_3539_axes_0 = const()[name = string("op_3539_axes_0"), val = tensor([-1])]; bool var_3539_keep_dims_0 = const()[name = string("op_3539_keep_dims_0"), val = bool(true)]; tensor var_3539_cast_fp16 = reduce_mean(axes = var_3539_axes_0, keep_dims = var_3539_keep_dims_0, x = var_3537_cast_fp16)[name = string("op_3539_cast_fp16")]; fp16 var_3540_to_fp16 = const()[name = string("op_3540_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3541_cast_fp16 = add(x = var_3539_cast_fp16, y = var_3540_to_fp16)[name = string("op_3541_cast_fp16")]; fp32 norm_149_epsilon_0 = const()[name = string("norm_149_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_149_cast_fp16 = rsqrt(epsilon = norm_149_epsilon_0, x = var_3541_cast_fp16)[name = string("norm_149_cast_fp16")]; tensor var_3543_cast_fp16 = mul(x = x_477_cast_fp16, y = norm_149_cast_fp16)[name = string("op_3543_cast_fp16")]; tensor layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287495104)))]; tensor var_3544_cast_fp16 = mul(x = var_3543_cast_fp16, y = layers_18_self_attn_k_norm_weight_to_fp16)[name = string("op_3544_cast_fp16")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = var_3533_cast_fp16)[name = string("x1_73_cast_fp16")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = var_3533_cast_fp16)[name = string("x2_73_cast_fp16")]; tensor var_3562_cast_fp16 = mul(x = x1_73_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3562_cast_fp16")]; tensor var_3563_cast_fp16 = mul(x = x2_73_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3563_cast_fp16")]; tensor var_3564_cast_fp16 = sub(x = var_3562_cast_fp16, y = var_3563_cast_fp16)[name = string("op_3564_cast_fp16")]; tensor var_3565_cast_fp16 = mul(x = x2_73_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3565_cast_fp16")]; tensor var_3566_cast_fp16 = mul(x = x1_73_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3566_cast_fp16")]; tensor var_3567_cast_fp16 = add(x = var_3565_cast_fp16, y = var_3566_cast_fp16)[name = string("op_3567_cast_fp16")]; bool q_37_interleave_0 = const()[name = string("q_37_interleave_0"), val = bool(false)]; tensor q_37_cast_fp16 = concat(axis = var_3479, interleave = q_37_interleave_0, values = (var_3564_cast_fp16, var_3567_cast_fp16))[name = string("q_37_cast_fp16")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = var_3544_cast_fp16)[name = string("x1_75_cast_fp16")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = var_3544_cast_fp16)[name = string("x2_75_cast_fp16")]; tensor var_3586_cast_fp16 = mul(x = x1_75_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3586_cast_fp16")]; tensor var_3587_cast_fp16 = mul(x = x2_75_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3587_cast_fp16")]; tensor var_3588_cast_fp16 = sub(x = var_3586_cast_fp16, y = var_3587_cast_fp16)[name = string("op_3588_cast_fp16")]; tensor var_3589_cast_fp16 = mul(x = x2_75_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3589_cast_fp16")]; tensor var_3590_cast_fp16 = mul(x = x1_75_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3590_cast_fp16")]; tensor var_3591_cast_fp16 = add(x = var_3589_cast_fp16, y = var_3590_cast_fp16)[name = string("op_3591_cast_fp16")]; bool k_37_interleave_0 = const()[name = string("k_37_interleave_0"), val = bool(false)]; tensor k_37_cast_fp16 = concat(axis = var_3479, interleave = k_37_interleave_0, values = (var_3588_cast_fp16, var_3591_cast_fp16))[name = string("k_37_cast_fp16")]; tensor read_state_36 = read_state(input = k_cache_18)[name = string("read_state_36")]; tensor k_cache_111_cast_fp16 = mul(x = read_state_36, y = var_264_cast_fp16)[name = string("k_cache_111_cast_fp16")]; write_state(data = k_cache_111_cast_fp16, input = k_cache_18)[name = string("coreml_update_state_184_write_state")]; tensor coreml_update_state_184 = read_state(input = k_cache_18)[name = string("coreml_update_state_184")]; tensor var_3596_cast_fp16 = mul(x = k_37_cast_fp16, y = onehot_cast_fp16)[name = string("op_3596_cast_fp16")]; tensor k_cache_113_cast_fp16 = add(x = coreml_update_state_184, y = var_3596_cast_fp16)[name = string("k_cache_113_cast_fp16")]; write_state(data = k_cache_113_cast_fp16, input = k_cache_18)[name = string("coreml_update_state_185_write_state")]; tensor coreml_update_state_185 = read_state(input = k_cache_18)[name = string("coreml_update_state_185")]; tensor read_state_37 = read_state(input = v_cache_18)[name = string("read_state_37")]; tensor v_cache_111_cast_fp16 = mul(x = read_state_37, y = var_264_cast_fp16)[name = string("v_cache_111_cast_fp16")]; write_state(data = v_cache_111_cast_fp16, input = v_cache_18)[name = string("coreml_update_state_186_write_state")]; tensor coreml_update_state_186 = read_state(input = v_cache_18)[name = string("coreml_update_state_186")]; tensor v_37_cast_fp16 = transpose(perm = v_37_perm_0, x = var_3522_cast_fp16)[name = string("transpose_37")]; tensor var_3600_cast_fp16 = mul(x = v_37_cast_fp16, y = onehot_cast_fp16)[name = string("op_3600_cast_fp16")]; tensor v_cache_113_cast_fp16 = add(x = coreml_update_state_186, y = var_3600_cast_fp16)[name = string("v_cache_113_cast_fp16")]; write_state(data = v_cache_113_cast_fp16, input = v_cache_18)[name = string("coreml_update_state_187_write_state")]; tensor coreml_update_state_187 = read_state(input = v_cache_18)[name = string("coreml_update_state_187")]; tensor var_3602_axes_0 = const()[name = string("op_3602_axes_0"), val = tensor([2])]; tensor var_3602_cast_fp16 = expand_dims(axes = var_3602_axes_0, x = coreml_update_state_185)[name = string("op_3602_cast_fp16")]; tensor k_exp_73_reps_0 = const()[name = string("k_exp_73_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_73_cast_fp16 = tile(reps = k_exp_73_reps_0, x = var_3602_cast_fp16)[name = string("k_exp_73_cast_fp16")]; tensor var_3605 = const()[name = string("op_3605"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_75_cast_fp16 = reshape(shape = var_3605, x = k_exp_73_cast_fp16)[name = string("k_exp_75_cast_fp16")]; tensor var_3607_axes_0 = const()[name = string("op_3607_axes_0"), val = tensor([2])]; tensor var_3607_cast_fp16 = expand_dims(axes = var_3607_axes_0, x = coreml_update_state_187)[name = string("op_3607_cast_fp16")]; tensor v_exp_73_reps_0 = const()[name = string("v_exp_73_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_73_cast_fp16 = tile(reps = v_exp_73_reps_0, x = var_3607_cast_fp16)[name = string("v_exp_73_cast_fp16")]; tensor var_3610 = const()[name = string("op_3610"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_75_cast_fp16 = reshape(shape = var_3610, x = v_exp_73_cast_fp16)[name = string("v_exp_75_cast_fp16")]; bool var_3613_transpose_x_1 = const()[name = string("op_3613_transpose_x_1"), val = bool(false)]; bool var_3613_transpose_y_1 = const()[name = string("op_3613_transpose_y_1"), val = bool(true)]; tensor var_3613_cast_fp16 = matmul(transpose_x = var_3613_transpose_x_1, transpose_y = var_3613_transpose_y_1, x = q_37_cast_fp16, y = k_exp_75_cast_fp16)[name = string("op_3613_cast_fp16")]; fp16 var_3614_to_fp16 = const()[name = string("op_3614_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_73_cast_fp16 = mul(x = var_3613_cast_fp16, y = var_3614_to_fp16)[name = string("attn_73_cast_fp16")]; tensor input_181_cast_fp16 = add(x = attn_73_cast_fp16, y = attention_mask_to_fp16)[name = string("input_181_cast_fp16")]; tensor attn_75_cast_fp16 = softmax(axis = var_3479, x = input_181_cast_fp16)[name = string("attn_75_cast_fp16")]; bool out_37_transpose_x_0 = const()[name = string("out_37_transpose_x_0"), val = bool(false)]; bool out_37_transpose_y_0 = const()[name = string("out_37_transpose_y_0"), val = bool(false)]; tensor out_37_cast_fp16 = matmul(transpose_x = out_37_transpose_x_0, transpose_y = out_37_transpose_y_0, x = attn_75_cast_fp16, y = v_exp_75_cast_fp16)[name = string("out_37_cast_fp16")]; tensor var_3619_perm_0 = const()[name = string("op_3619_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3620 = const()[name = string("op_3620"), val = tensor([1, 1, -1])]; tensor var_3619_cast_fp16 = transpose(perm = var_3619_perm_0, x = out_37_cast_fp16)[name = string("transpose_36")]; tensor input_183_cast_fp16 = reshape(shape = var_3620, x = var_3619_cast_fp16)[name = string("input_183_cast_fp16")]; tensor layers_18_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287495424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289592640))))[name = string("layers_18_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_o_proj_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor x_487_cast_fp16 = add(x = x_467_cast_fp16, y = linear_129_cast_fp16)[name = string("x_487_cast_fp16")]; fp16 var_3478_promoted_3_to_fp16 = const()[name = string("op_3478_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3627_cast_fp16 = pow(x = x_487_cast_fp16, y = var_3478_promoted_3_to_fp16)[name = string("op_3627_cast_fp16")]; tensor var_3629_axes_0 = const()[name = string("op_3629_axes_0"), val = tensor([-1])]; bool var_3629_keep_dims_0 = const()[name = string("op_3629_keep_dims_0"), val = bool(true)]; tensor var_3629_cast_fp16 = reduce_mean(axes = var_3629_axes_0, keep_dims = var_3629_keep_dims_0, x = var_3627_cast_fp16)[name = string("op_3629_cast_fp16")]; fp16 var_3630_to_fp16 = const()[name = string("op_3630_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3631_cast_fp16 = add(x = var_3629_cast_fp16, y = var_3630_to_fp16)[name = string("op_3631_cast_fp16")]; fp32 norm_151_epsilon_0 = const()[name = string("norm_151_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_151_cast_fp16 = rsqrt(epsilon = norm_151_epsilon_0, x = var_3631_cast_fp16)[name = string("norm_151_cast_fp16")]; tensor var_3633_cast_fp16 = mul(x = x_487_cast_fp16, y = norm_151_cast_fp16)[name = string("op_3633_cast_fp16")]; tensor layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289593216)))]; tensor var_3634_cast_fp16 = mul(x = var_3633_cast_fp16, y = layers_18_post_attention_layernorm_weight_to_fp16)[name = string("op_3634_cast_fp16")]; tensor layers_18_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289595328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292741120))))[name = string("layers_18_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_gate_proj_weight_to_fp16_palettized, x = var_3634_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_3644_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_3644_cast_fp16")]; tensor layers_18_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292741696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295887488))))[name = string("layers_18_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_up_proj_weight_to_fp16_palettized, x = var_3634_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor input_189_cast_fp16 = mul(x = var_3644_cast_fp16, y = linear_131_cast_fp16)[name = string("input_189_cast_fp16")]; tensor layers_18_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299033856))))[name = string("layers_18_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_132_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_mlp_down_proj_weight_to_fp16_palettized, x = input_189_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor x_493_cast_fp16 = add(x = x_487_cast_fp16, y = linear_132_cast_fp16)[name = string("x_493_cast_fp16")]; int32 var_3664 = const()[name = string("op_3664"), val = int32(-1)]; fp16 var_3663_promoted_to_fp16 = const()[name = string("op_3663_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3673_cast_fp16 = pow(x = x_493_cast_fp16, y = var_3663_promoted_to_fp16)[name = string("op_3673_cast_fp16")]; tensor var_3675_axes_0 = const()[name = string("op_3675_axes_0"), val = tensor([-1])]; bool var_3675_keep_dims_0 = const()[name = string("op_3675_keep_dims_0"), val = bool(true)]; tensor var_3675_cast_fp16 = reduce_mean(axes = var_3675_axes_0, keep_dims = var_3675_keep_dims_0, x = var_3673_cast_fp16)[name = string("op_3675_cast_fp16")]; fp16 var_3676_to_fp16 = const()[name = string("op_3676_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3677_cast_fp16 = add(x = var_3675_cast_fp16, y = var_3676_to_fp16)[name = string("op_3677_cast_fp16")]; fp32 norm_153_epsilon_0 = const()[name = string("norm_153_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_153_cast_fp16 = rsqrt(epsilon = norm_153_epsilon_0, x = var_3677_cast_fp16)[name = string("norm_153_cast_fp16")]; tensor var_3679_cast_fp16 = mul(x = x_493_cast_fp16, y = norm_153_cast_fp16)[name = string("op_3679_cast_fp16")]; tensor layers_19_input_layernorm_weight_to_fp16 = const()[name = string("layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299034432)))]; tensor var_3680_cast_fp16 = mul(x = var_3679_cast_fp16, y = layers_19_input_layernorm_weight_to_fp16)[name = string("op_3680_cast_fp16")]; tensor layers_19_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299036544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301133760))))[name = string("layers_19_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_19_self_attn_q_proj_weight_to_fp16_palettized, x = var_3680_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor var_3696 = const()[name = string("op_3696"), val = tensor([1, 1, 16, 128])]; tensor var_3697_cast_fp16 = reshape(shape = var_3696, x = linear_133_cast_fp16)[name = string("op_3697_cast_fp16")]; tensor x_499_perm_0 = const()[name = string("x_499_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_19_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301134336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302182976))))[name = string("layers_19_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_k_proj_weight_to_fp16_palettized, x = var_3680_cast_fp16)[name = string("linear_134_cast_fp16")]; tensor var_3701 = const()[name = string("op_3701"), val = tensor([1, 1, 8, 128])]; tensor var_3702_cast_fp16 = reshape(shape = var_3701, x = linear_134_cast_fp16)[name = string("op_3702_cast_fp16")]; tensor x_503_perm_0 = const()[name = string("x_503_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_19_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302183552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303232192))))[name = string("layers_19_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_v_proj_weight_to_fp16_palettized, x = var_3680_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor var_3706 = const()[name = string("op_3706"), val = tensor([1, 1, 8, 128])]; tensor var_3707_cast_fp16 = reshape(shape = var_3706, x = linear_135_cast_fp16)[name = string("op_3707_cast_fp16")]; tensor v_39_perm_0 = const()[name = string("v_39_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_3663_promoted_1_to_fp16 = const()[name = string("op_3663_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_499_cast_fp16 = transpose(perm = x_499_perm_0, x = var_3697_cast_fp16)[name = string("transpose_35")]; tensor var_3711_cast_fp16 = pow(x = x_499_cast_fp16, y = var_3663_promoted_1_to_fp16)[name = string("op_3711_cast_fp16")]; tensor var_3713_axes_0 = const()[name = string("op_3713_axes_0"), val = tensor([-1])]; bool var_3713_keep_dims_0 = const()[name = string("op_3713_keep_dims_0"), val = bool(true)]; tensor var_3713_cast_fp16 = reduce_mean(axes = var_3713_axes_0, keep_dims = var_3713_keep_dims_0, x = var_3711_cast_fp16)[name = string("op_3713_cast_fp16")]; fp16 var_3714_to_fp16 = const()[name = string("op_3714_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3715_cast_fp16 = add(x = var_3713_cast_fp16, y = var_3714_to_fp16)[name = string("op_3715_cast_fp16")]; fp32 norm_155_epsilon_0 = const()[name = string("norm_155_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_155_cast_fp16 = rsqrt(epsilon = norm_155_epsilon_0, x = var_3715_cast_fp16)[name = string("norm_155_cast_fp16")]; tensor var_3717_cast_fp16 = mul(x = x_499_cast_fp16, y = norm_155_cast_fp16)[name = string("op_3717_cast_fp16")]; tensor layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303232768)))]; tensor var_3718_cast_fp16 = mul(x = var_3717_cast_fp16, y = layers_19_self_attn_q_norm_weight_to_fp16)[name = string("op_3718_cast_fp16")]; fp16 var_3663_promoted_2_to_fp16 = const()[name = string("op_3663_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_503_cast_fp16 = transpose(perm = x_503_perm_0, x = var_3702_cast_fp16)[name = string("transpose_34")]; tensor var_3722_cast_fp16 = pow(x = x_503_cast_fp16, y = var_3663_promoted_2_to_fp16)[name = string("op_3722_cast_fp16")]; tensor var_3724_axes_0 = const()[name = string("op_3724_axes_0"), val = tensor([-1])]; bool var_3724_keep_dims_0 = const()[name = string("op_3724_keep_dims_0"), val = bool(true)]; tensor var_3724_cast_fp16 = reduce_mean(axes = var_3724_axes_0, keep_dims = var_3724_keep_dims_0, x = var_3722_cast_fp16)[name = string("op_3724_cast_fp16")]; fp16 var_3725_to_fp16 = const()[name = string("op_3725_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3726_cast_fp16 = add(x = var_3724_cast_fp16, y = var_3725_to_fp16)[name = string("op_3726_cast_fp16")]; fp32 norm_157_epsilon_0 = const()[name = string("norm_157_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_157_cast_fp16 = rsqrt(epsilon = norm_157_epsilon_0, x = var_3726_cast_fp16)[name = string("norm_157_cast_fp16")]; tensor var_3728_cast_fp16 = mul(x = x_503_cast_fp16, y = norm_157_cast_fp16)[name = string("op_3728_cast_fp16")]; tensor layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303233088)))]; tensor var_3729_cast_fp16 = mul(x = var_3728_cast_fp16, y = layers_19_self_attn_k_norm_weight_to_fp16)[name = string("op_3729_cast_fp16")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = var_3718_cast_fp16)[name = string("x1_77_cast_fp16")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = var_3718_cast_fp16)[name = string("x2_77_cast_fp16")]; tensor var_3747_cast_fp16 = mul(x = x1_77_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3747_cast_fp16")]; tensor var_3748_cast_fp16 = mul(x = x2_77_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3748_cast_fp16")]; tensor var_3749_cast_fp16 = sub(x = var_3747_cast_fp16, y = var_3748_cast_fp16)[name = string("op_3749_cast_fp16")]; tensor var_3750_cast_fp16 = mul(x = x2_77_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3750_cast_fp16")]; tensor var_3751_cast_fp16 = mul(x = x1_77_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3751_cast_fp16")]; tensor var_3752_cast_fp16 = add(x = var_3750_cast_fp16, y = var_3751_cast_fp16)[name = string("op_3752_cast_fp16")]; bool q_39_interleave_0 = const()[name = string("q_39_interleave_0"), val = bool(false)]; tensor q_39_cast_fp16 = concat(axis = var_3664, interleave = q_39_interleave_0, values = (var_3749_cast_fp16, var_3752_cast_fp16))[name = string("q_39_cast_fp16")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = var_3729_cast_fp16)[name = string("x1_79_cast_fp16")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = var_3729_cast_fp16)[name = string("x2_79_cast_fp16")]; tensor var_3771_cast_fp16 = mul(x = x1_79_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3771_cast_fp16")]; tensor var_3772_cast_fp16 = mul(x = x2_79_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3772_cast_fp16")]; tensor var_3773_cast_fp16 = sub(x = var_3771_cast_fp16, y = var_3772_cast_fp16)[name = string("op_3773_cast_fp16")]; tensor var_3774_cast_fp16 = mul(x = x2_79_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3774_cast_fp16")]; tensor var_3775_cast_fp16 = mul(x = x1_79_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3775_cast_fp16")]; tensor var_3776_cast_fp16 = add(x = var_3774_cast_fp16, y = var_3775_cast_fp16)[name = string("op_3776_cast_fp16")]; bool k_39_interleave_0 = const()[name = string("k_39_interleave_0"), val = bool(false)]; tensor k_39_cast_fp16 = concat(axis = var_3664, interleave = k_39_interleave_0, values = (var_3773_cast_fp16, var_3776_cast_fp16))[name = string("k_39_cast_fp16")]; tensor read_state_38 = read_state(input = k_cache_19)[name = string("read_state_38")]; tensor k_cache_117_cast_fp16 = mul(x = read_state_38, y = var_264_cast_fp16)[name = string("k_cache_117_cast_fp16")]; write_state(data = k_cache_117_cast_fp16, input = k_cache_19)[name = string("coreml_update_state_188_write_state")]; tensor coreml_update_state_188 = read_state(input = k_cache_19)[name = string("coreml_update_state_188")]; tensor var_3781_cast_fp16 = mul(x = k_39_cast_fp16, y = onehot_cast_fp16)[name = string("op_3781_cast_fp16")]; tensor k_cache_119_cast_fp16 = add(x = coreml_update_state_188, y = var_3781_cast_fp16)[name = string("k_cache_119_cast_fp16")]; write_state(data = k_cache_119_cast_fp16, input = k_cache_19)[name = string("coreml_update_state_189_write_state")]; tensor coreml_update_state_189 = read_state(input = k_cache_19)[name = string("coreml_update_state_189")]; tensor read_state_39 = read_state(input = v_cache_19)[name = string("read_state_39")]; tensor v_cache_117_cast_fp16 = mul(x = read_state_39, y = var_264_cast_fp16)[name = string("v_cache_117_cast_fp16")]; write_state(data = v_cache_117_cast_fp16, input = v_cache_19)[name = string("coreml_update_state_190_write_state")]; tensor coreml_update_state_190 = read_state(input = v_cache_19)[name = string("coreml_update_state_190")]; tensor v_39_cast_fp16 = transpose(perm = v_39_perm_0, x = var_3707_cast_fp16)[name = string("transpose_33")]; tensor var_3785_cast_fp16 = mul(x = v_39_cast_fp16, y = onehot_cast_fp16)[name = string("op_3785_cast_fp16")]; tensor v_cache_119_cast_fp16 = add(x = coreml_update_state_190, y = var_3785_cast_fp16)[name = string("v_cache_119_cast_fp16")]; write_state(data = v_cache_119_cast_fp16, input = v_cache_19)[name = string("coreml_update_state_191_write_state")]; tensor coreml_update_state_191 = read_state(input = v_cache_19)[name = string("coreml_update_state_191")]; tensor var_3787_axes_0 = const()[name = string("op_3787_axes_0"), val = tensor([2])]; tensor var_3787_cast_fp16 = expand_dims(axes = var_3787_axes_0, x = coreml_update_state_189)[name = string("op_3787_cast_fp16")]; tensor k_exp_77_reps_0 = const()[name = string("k_exp_77_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_77_cast_fp16 = tile(reps = k_exp_77_reps_0, x = var_3787_cast_fp16)[name = string("k_exp_77_cast_fp16")]; tensor var_3790 = const()[name = string("op_3790"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_79_cast_fp16 = reshape(shape = var_3790, x = k_exp_77_cast_fp16)[name = string("k_exp_79_cast_fp16")]; tensor var_3792_axes_0 = const()[name = string("op_3792_axes_0"), val = tensor([2])]; tensor var_3792_cast_fp16 = expand_dims(axes = var_3792_axes_0, x = coreml_update_state_191)[name = string("op_3792_cast_fp16")]; tensor v_exp_77_reps_0 = const()[name = string("v_exp_77_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_77_cast_fp16 = tile(reps = v_exp_77_reps_0, x = var_3792_cast_fp16)[name = string("v_exp_77_cast_fp16")]; tensor var_3795 = const()[name = string("op_3795"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_79_cast_fp16 = reshape(shape = var_3795, x = v_exp_77_cast_fp16)[name = string("v_exp_79_cast_fp16")]; bool var_3798_transpose_x_1 = const()[name = string("op_3798_transpose_x_1"), val = bool(false)]; bool var_3798_transpose_y_1 = const()[name = string("op_3798_transpose_y_1"), val = bool(true)]; tensor var_3798_cast_fp16 = matmul(transpose_x = var_3798_transpose_x_1, transpose_y = var_3798_transpose_y_1, x = q_39_cast_fp16, y = k_exp_79_cast_fp16)[name = string("op_3798_cast_fp16")]; fp16 var_3799_to_fp16 = const()[name = string("op_3799_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_77_cast_fp16 = mul(x = var_3798_cast_fp16, y = var_3799_to_fp16)[name = string("attn_77_cast_fp16")]; tensor input_191_cast_fp16 = add(x = attn_77_cast_fp16, y = attention_mask_to_fp16)[name = string("input_191_cast_fp16")]; tensor attn_79_cast_fp16 = softmax(axis = var_3664, x = input_191_cast_fp16)[name = string("attn_79_cast_fp16")]; bool out_39_transpose_x_0 = const()[name = string("out_39_transpose_x_0"), val = bool(false)]; bool out_39_transpose_y_0 = const()[name = string("out_39_transpose_y_0"), val = bool(false)]; tensor out_39_cast_fp16 = matmul(transpose_x = out_39_transpose_x_0, transpose_y = out_39_transpose_y_0, x = attn_79_cast_fp16, y = v_exp_79_cast_fp16)[name = string("out_39_cast_fp16")]; tensor var_3804_perm_0 = const()[name = string("op_3804_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3805 = const()[name = string("op_3805"), val = tensor([1, 1, -1])]; tensor var_3804_cast_fp16 = transpose(perm = var_3804_perm_0, x = out_39_cast_fp16)[name = string("transpose_32")]; tensor input_193_cast_fp16 = reshape(shape = var_3805, x = var_3804_cast_fp16)[name = string("input_193_cast_fp16")]; tensor layers_19_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303233408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305330624))))[name = string("layers_19_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_o_proj_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor x_513_cast_fp16 = add(x = x_493_cast_fp16, y = linear_136_cast_fp16)[name = string("x_513_cast_fp16")]; fp16 var_3663_promoted_3_to_fp16 = const()[name = string("op_3663_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3812_cast_fp16 = pow(x = x_513_cast_fp16, y = var_3663_promoted_3_to_fp16)[name = string("op_3812_cast_fp16")]; tensor var_3814_axes_0 = const()[name = string("op_3814_axes_0"), val = tensor([-1])]; bool var_3814_keep_dims_0 = const()[name = string("op_3814_keep_dims_0"), val = bool(true)]; tensor var_3814_cast_fp16 = reduce_mean(axes = var_3814_axes_0, keep_dims = var_3814_keep_dims_0, x = var_3812_cast_fp16)[name = string("op_3814_cast_fp16")]; fp16 var_3815_to_fp16 = const()[name = string("op_3815_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3816_cast_fp16 = add(x = var_3814_cast_fp16, y = var_3815_to_fp16)[name = string("op_3816_cast_fp16")]; fp32 norm_159_epsilon_0 = const()[name = string("norm_159_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_159_cast_fp16 = rsqrt(epsilon = norm_159_epsilon_0, x = var_3816_cast_fp16)[name = string("norm_159_cast_fp16")]; tensor var_3818_cast_fp16 = mul(x = x_513_cast_fp16, y = norm_159_cast_fp16)[name = string("op_3818_cast_fp16")]; tensor layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305331200)))]; tensor var_3819_cast_fp16 = mul(x = var_3818_cast_fp16, y = layers_19_post_attention_layernorm_weight_to_fp16)[name = string("op_3819_cast_fp16")]; tensor layers_19_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308479104))))[name = string("layers_19_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_gate_proj_weight_to_fp16_palettized, x = var_3819_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_3829_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_3829_cast_fp16")]; tensor layers_19_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308479680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311625472))))[name = string("layers_19_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_up_proj_weight_to_fp16_palettized, x = var_3819_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor input_199_cast_fp16 = mul(x = var_3829_cast_fp16, y = linear_138_cast_fp16)[name = string("input_199_cast_fp16")]; tensor layers_19_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311626048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314771840))))[name = string("layers_19_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_mlp_down_proj_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor x_519_cast_fp16 = add(x = x_513_cast_fp16, y = linear_139_cast_fp16)[name = string("x_519_cast_fp16")]; int32 var_3849 = const()[name = string("op_3849"), val = int32(-1)]; fp16 var_3848_promoted_to_fp16 = const()[name = string("op_3848_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3858_cast_fp16 = pow(x = x_519_cast_fp16, y = var_3848_promoted_to_fp16)[name = string("op_3858_cast_fp16")]; tensor var_3860_axes_0 = const()[name = string("op_3860_axes_0"), val = tensor([-1])]; bool var_3860_keep_dims_0 = const()[name = string("op_3860_keep_dims_0"), val = bool(true)]; tensor var_3860_cast_fp16 = reduce_mean(axes = var_3860_axes_0, keep_dims = var_3860_keep_dims_0, x = var_3858_cast_fp16)[name = string("op_3860_cast_fp16")]; fp16 var_3861_to_fp16 = const()[name = string("op_3861_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3862_cast_fp16 = add(x = var_3860_cast_fp16, y = var_3861_to_fp16)[name = string("op_3862_cast_fp16")]; fp32 norm_161_epsilon_0 = const()[name = string("norm_161_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_161_cast_fp16 = rsqrt(epsilon = norm_161_epsilon_0, x = var_3862_cast_fp16)[name = string("norm_161_cast_fp16")]; tensor var_3864_cast_fp16 = mul(x = x_519_cast_fp16, y = norm_161_cast_fp16)[name = string("op_3864_cast_fp16")]; tensor layers_20_input_layernorm_weight_to_fp16 = const()[name = string("layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314772416)))]; tensor var_3865_cast_fp16 = mul(x = var_3864_cast_fp16, y = layers_20_input_layernorm_weight_to_fp16)[name = string("op_3865_cast_fp16")]; tensor layers_20_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314774528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316871744))))[name = string("layers_20_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_20_self_attn_q_proj_weight_to_fp16_palettized, x = var_3865_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor var_3881 = const()[name = string("op_3881"), val = tensor([1, 1, 16, 128])]; tensor var_3882_cast_fp16 = reshape(shape = var_3881, x = linear_140_cast_fp16)[name = string("op_3882_cast_fp16")]; tensor x_525_perm_0 = const()[name = string("x_525_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_20_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316872320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317920960))))[name = string("layers_20_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_k_proj_weight_to_fp16_palettized, x = var_3865_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor var_3886 = const()[name = string("op_3886"), val = tensor([1, 1, 8, 128])]; tensor var_3887_cast_fp16 = reshape(shape = var_3886, x = linear_141_cast_fp16)[name = string("op_3887_cast_fp16")]; tensor x_529_perm_0 = const()[name = string("x_529_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_20_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317921536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318970176))))[name = string("layers_20_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_v_proj_weight_to_fp16_palettized, x = var_3865_cast_fp16)[name = string("linear_142_cast_fp16")]; tensor var_3891 = const()[name = string("op_3891"), val = tensor([1, 1, 8, 128])]; tensor var_3892_cast_fp16 = reshape(shape = var_3891, x = linear_142_cast_fp16)[name = string("op_3892_cast_fp16")]; tensor v_41_perm_0 = const()[name = string("v_41_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_3848_promoted_1_to_fp16 = const()[name = string("op_3848_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_525_cast_fp16 = transpose(perm = x_525_perm_0, x = var_3882_cast_fp16)[name = string("transpose_31")]; tensor var_3896_cast_fp16 = pow(x = x_525_cast_fp16, y = var_3848_promoted_1_to_fp16)[name = string("op_3896_cast_fp16")]; tensor var_3898_axes_0 = const()[name = string("op_3898_axes_0"), val = tensor([-1])]; bool var_3898_keep_dims_0 = const()[name = string("op_3898_keep_dims_0"), val = bool(true)]; tensor var_3898_cast_fp16 = reduce_mean(axes = var_3898_axes_0, keep_dims = var_3898_keep_dims_0, x = var_3896_cast_fp16)[name = string("op_3898_cast_fp16")]; fp16 var_3899_to_fp16 = const()[name = string("op_3899_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3900_cast_fp16 = add(x = var_3898_cast_fp16, y = var_3899_to_fp16)[name = string("op_3900_cast_fp16")]; fp32 norm_163_epsilon_0 = const()[name = string("norm_163_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_163_cast_fp16 = rsqrt(epsilon = norm_163_epsilon_0, x = var_3900_cast_fp16)[name = string("norm_163_cast_fp16")]; tensor var_3902_cast_fp16 = mul(x = x_525_cast_fp16, y = norm_163_cast_fp16)[name = string("op_3902_cast_fp16")]; tensor layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318970752)))]; tensor var_3903_cast_fp16 = mul(x = var_3902_cast_fp16, y = layers_20_self_attn_q_norm_weight_to_fp16)[name = string("op_3903_cast_fp16")]; fp16 var_3848_promoted_2_to_fp16 = const()[name = string("op_3848_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_529_cast_fp16 = transpose(perm = x_529_perm_0, x = var_3887_cast_fp16)[name = string("transpose_30")]; tensor var_3907_cast_fp16 = pow(x = x_529_cast_fp16, y = var_3848_promoted_2_to_fp16)[name = string("op_3907_cast_fp16")]; tensor var_3909_axes_0 = const()[name = string("op_3909_axes_0"), val = tensor([-1])]; bool var_3909_keep_dims_0 = const()[name = string("op_3909_keep_dims_0"), val = bool(true)]; tensor var_3909_cast_fp16 = reduce_mean(axes = var_3909_axes_0, keep_dims = var_3909_keep_dims_0, x = var_3907_cast_fp16)[name = string("op_3909_cast_fp16")]; fp16 var_3910_to_fp16 = const()[name = string("op_3910_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3911_cast_fp16 = add(x = var_3909_cast_fp16, y = var_3910_to_fp16)[name = string("op_3911_cast_fp16")]; fp32 norm_165_epsilon_0 = const()[name = string("norm_165_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_165_cast_fp16 = rsqrt(epsilon = norm_165_epsilon_0, x = var_3911_cast_fp16)[name = string("norm_165_cast_fp16")]; tensor var_3913_cast_fp16 = mul(x = x_529_cast_fp16, y = norm_165_cast_fp16)[name = string("op_3913_cast_fp16")]; tensor layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318971072)))]; tensor var_3914_cast_fp16 = mul(x = var_3913_cast_fp16, y = layers_20_self_attn_k_norm_weight_to_fp16)[name = string("op_3914_cast_fp16")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = var_3903_cast_fp16)[name = string("x1_81_cast_fp16")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = var_3903_cast_fp16)[name = string("x2_81_cast_fp16")]; tensor var_3932_cast_fp16 = mul(x = x1_81_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3932_cast_fp16")]; tensor var_3933_cast_fp16 = mul(x = x2_81_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3933_cast_fp16")]; tensor var_3934_cast_fp16 = sub(x = var_3932_cast_fp16, y = var_3933_cast_fp16)[name = string("op_3934_cast_fp16")]; tensor var_3935_cast_fp16 = mul(x = x2_81_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor var_3936_cast_fp16 = mul(x = x1_81_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3936_cast_fp16")]; tensor var_3937_cast_fp16 = add(x = var_3935_cast_fp16, y = var_3936_cast_fp16)[name = string("op_3937_cast_fp16")]; bool q_41_interleave_0 = const()[name = string("q_41_interleave_0"), val = bool(false)]; tensor q_41_cast_fp16 = concat(axis = var_3849, interleave = q_41_interleave_0, values = (var_3934_cast_fp16, var_3937_cast_fp16))[name = string("q_41_cast_fp16")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = var_3914_cast_fp16)[name = string("x1_83_cast_fp16")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = var_3914_cast_fp16)[name = string("x2_83_cast_fp16")]; tensor var_3956_cast_fp16 = mul(x = x1_83_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3956_cast_fp16")]; tensor var_3957_cast_fp16 = mul(x = x2_83_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3957_cast_fp16")]; tensor var_3958_cast_fp16 = sub(x = var_3956_cast_fp16, y = var_3957_cast_fp16)[name = string("op_3958_cast_fp16")]; tensor var_3959_cast_fp16 = mul(x = x2_83_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_3959_cast_fp16")]; tensor var_3960_cast_fp16 = mul(x = x1_83_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_3960_cast_fp16")]; tensor var_3961_cast_fp16 = add(x = var_3959_cast_fp16, y = var_3960_cast_fp16)[name = string("op_3961_cast_fp16")]; bool k_41_interleave_0 = const()[name = string("k_41_interleave_0"), val = bool(false)]; tensor k_41_cast_fp16 = concat(axis = var_3849, interleave = k_41_interleave_0, values = (var_3958_cast_fp16, var_3961_cast_fp16))[name = string("k_41_cast_fp16")]; tensor read_state_40 = read_state(input = k_cache_20)[name = string("read_state_40")]; tensor k_cache_123_cast_fp16 = mul(x = read_state_40, y = var_264_cast_fp16)[name = string("k_cache_123_cast_fp16")]; write_state(data = k_cache_123_cast_fp16, input = k_cache_20)[name = string("coreml_update_state_192_write_state")]; tensor coreml_update_state_192 = read_state(input = k_cache_20)[name = string("coreml_update_state_192")]; tensor var_3966_cast_fp16 = mul(x = k_41_cast_fp16, y = onehot_cast_fp16)[name = string("op_3966_cast_fp16")]; tensor k_cache_125_cast_fp16 = add(x = coreml_update_state_192, y = var_3966_cast_fp16)[name = string("k_cache_125_cast_fp16")]; write_state(data = k_cache_125_cast_fp16, input = k_cache_20)[name = string("coreml_update_state_193_write_state")]; tensor coreml_update_state_193 = read_state(input = k_cache_20)[name = string("coreml_update_state_193")]; tensor read_state_41 = read_state(input = v_cache_20)[name = string("read_state_41")]; tensor v_cache_123_cast_fp16 = mul(x = read_state_41, y = var_264_cast_fp16)[name = string("v_cache_123_cast_fp16")]; write_state(data = v_cache_123_cast_fp16, input = v_cache_20)[name = string("coreml_update_state_194_write_state")]; tensor coreml_update_state_194 = read_state(input = v_cache_20)[name = string("coreml_update_state_194")]; tensor v_41_cast_fp16 = transpose(perm = v_41_perm_0, x = var_3892_cast_fp16)[name = string("transpose_29")]; tensor var_3970_cast_fp16 = mul(x = v_41_cast_fp16, y = onehot_cast_fp16)[name = string("op_3970_cast_fp16")]; tensor v_cache_125_cast_fp16 = add(x = coreml_update_state_194, y = var_3970_cast_fp16)[name = string("v_cache_125_cast_fp16")]; write_state(data = v_cache_125_cast_fp16, input = v_cache_20)[name = string("coreml_update_state_195_write_state")]; tensor coreml_update_state_195 = read_state(input = v_cache_20)[name = string("coreml_update_state_195")]; tensor var_3972_axes_0 = const()[name = string("op_3972_axes_0"), val = tensor([2])]; tensor var_3972_cast_fp16 = expand_dims(axes = var_3972_axes_0, x = coreml_update_state_193)[name = string("op_3972_cast_fp16")]; tensor k_exp_81_reps_0 = const()[name = string("k_exp_81_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_81_cast_fp16 = tile(reps = k_exp_81_reps_0, x = var_3972_cast_fp16)[name = string("k_exp_81_cast_fp16")]; tensor var_3975 = const()[name = string("op_3975"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_83_cast_fp16 = reshape(shape = var_3975, x = k_exp_81_cast_fp16)[name = string("k_exp_83_cast_fp16")]; tensor var_3977_axes_0 = const()[name = string("op_3977_axes_0"), val = tensor([2])]; tensor var_3977_cast_fp16 = expand_dims(axes = var_3977_axes_0, x = coreml_update_state_195)[name = string("op_3977_cast_fp16")]; tensor v_exp_81_reps_0 = const()[name = string("v_exp_81_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_81_cast_fp16 = tile(reps = v_exp_81_reps_0, x = var_3977_cast_fp16)[name = string("v_exp_81_cast_fp16")]; tensor var_3980 = const()[name = string("op_3980"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_83_cast_fp16 = reshape(shape = var_3980, x = v_exp_81_cast_fp16)[name = string("v_exp_83_cast_fp16")]; bool var_3983_transpose_x_1 = const()[name = string("op_3983_transpose_x_1"), val = bool(false)]; bool var_3983_transpose_y_1 = const()[name = string("op_3983_transpose_y_1"), val = bool(true)]; tensor var_3983_cast_fp16 = matmul(transpose_x = var_3983_transpose_x_1, transpose_y = var_3983_transpose_y_1, x = q_41_cast_fp16, y = k_exp_83_cast_fp16)[name = string("op_3983_cast_fp16")]; fp16 var_3984_to_fp16 = const()[name = string("op_3984_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_81_cast_fp16 = mul(x = var_3983_cast_fp16, y = var_3984_to_fp16)[name = string("attn_81_cast_fp16")]; tensor input_201_cast_fp16 = add(x = attn_81_cast_fp16, y = attention_mask_to_fp16)[name = string("input_201_cast_fp16")]; tensor attn_83_cast_fp16 = softmax(axis = var_3849, x = input_201_cast_fp16)[name = string("attn_83_cast_fp16")]; bool out_41_transpose_x_0 = const()[name = string("out_41_transpose_x_0"), val = bool(false)]; bool out_41_transpose_y_0 = const()[name = string("out_41_transpose_y_0"), val = bool(false)]; tensor out_41_cast_fp16 = matmul(transpose_x = out_41_transpose_x_0, transpose_y = out_41_transpose_y_0, x = attn_83_cast_fp16, y = v_exp_83_cast_fp16)[name = string("out_41_cast_fp16")]; tensor var_3989_perm_0 = const()[name = string("op_3989_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3990 = const()[name = string("op_3990"), val = tensor([1, 1, -1])]; tensor var_3989_cast_fp16 = transpose(perm = var_3989_perm_0, x = out_41_cast_fp16)[name = string("transpose_28")]; tensor input_203_cast_fp16 = reshape(shape = var_3990, x = var_3989_cast_fp16)[name = string("input_203_cast_fp16")]; tensor layers_20_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318971392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321068608))))[name = string("layers_20_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_o_proj_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor x_539_cast_fp16 = add(x = x_519_cast_fp16, y = linear_143_cast_fp16)[name = string("x_539_cast_fp16")]; fp16 var_3848_promoted_3_to_fp16 = const()[name = string("op_3848_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_3997_cast_fp16 = pow(x = x_539_cast_fp16, y = var_3848_promoted_3_to_fp16)[name = string("op_3997_cast_fp16")]; tensor var_3999_axes_0 = const()[name = string("op_3999_axes_0"), val = tensor([-1])]; bool var_3999_keep_dims_0 = const()[name = string("op_3999_keep_dims_0"), val = bool(true)]; tensor var_3999_cast_fp16 = reduce_mean(axes = var_3999_axes_0, keep_dims = var_3999_keep_dims_0, x = var_3997_cast_fp16)[name = string("op_3999_cast_fp16")]; fp16 var_4000_to_fp16 = const()[name = string("op_4000_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4001_cast_fp16 = add(x = var_3999_cast_fp16, y = var_4000_to_fp16)[name = string("op_4001_cast_fp16")]; fp32 norm_167_epsilon_0 = const()[name = string("norm_167_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_167_cast_fp16 = rsqrt(epsilon = norm_167_epsilon_0, x = var_4001_cast_fp16)[name = string("norm_167_cast_fp16")]; tensor var_4003_cast_fp16 = mul(x = x_539_cast_fp16, y = norm_167_cast_fp16)[name = string("op_4003_cast_fp16")]; tensor layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321069184)))]; tensor var_4004_cast_fp16 = mul(x = var_4003_cast_fp16, y = layers_20_post_attention_layernorm_weight_to_fp16)[name = string("op_4004_cast_fp16")]; tensor layers_20_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321071296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324217088))))[name = string("layers_20_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_gate_proj_weight_to_fp16_palettized, x = var_4004_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_4014_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_4014_cast_fp16")]; tensor layers_20_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324217664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327363456))))[name = string("layers_20_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_up_proj_weight_to_fp16_palettized, x = var_4004_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor input_209_cast_fp16 = mul(x = var_4014_cast_fp16, y = linear_145_cast_fp16)[name = string("input_209_cast_fp16")]; tensor layers_20_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327364032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330509824))))[name = string("layers_20_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_146_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_mlp_down_proj_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor x_545_cast_fp16 = add(x = x_539_cast_fp16, y = linear_146_cast_fp16)[name = string("x_545_cast_fp16")]; int32 var_4034 = const()[name = string("op_4034"), val = int32(-1)]; fp16 var_4033_promoted_to_fp16 = const()[name = string("op_4033_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4043_cast_fp16 = pow(x = x_545_cast_fp16, y = var_4033_promoted_to_fp16)[name = string("op_4043_cast_fp16")]; tensor var_4045_axes_0 = const()[name = string("op_4045_axes_0"), val = tensor([-1])]; bool var_4045_keep_dims_0 = const()[name = string("op_4045_keep_dims_0"), val = bool(true)]; tensor var_4045_cast_fp16 = reduce_mean(axes = var_4045_axes_0, keep_dims = var_4045_keep_dims_0, x = var_4043_cast_fp16)[name = string("op_4045_cast_fp16")]; fp16 var_4046_to_fp16 = const()[name = string("op_4046_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4047_cast_fp16 = add(x = var_4045_cast_fp16, y = var_4046_to_fp16)[name = string("op_4047_cast_fp16")]; fp32 norm_169_epsilon_0 = const()[name = string("norm_169_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_169_cast_fp16 = rsqrt(epsilon = norm_169_epsilon_0, x = var_4047_cast_fp16)[name = string("norm_169_cast_fp16")]; tensor var_4049_cast_fp16 = mul(x = x_545_cast_fp16, y = norm_169_cast_fp16)[name = string("op_4049_cast_fp16")]; tensor layers_21_input_layernorm_weight_to_fp16 = const()[name = string("layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330510400)))]; tensor var_4050_cast_fp16 = mul(x = var_4049_cast_fp16, y = layers_21_input_layernorm_weight_to_fp16)[name = string("op_4050_cast_fp16")]; tensor layers_21_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332609728))))[name = string("layers_21_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_21_self_attn_q_proj_weight_to_fp16_palettized, x = var_4050_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor var_4066 = const()[name = string("op_4066"), val = tensor([1, 1, 16, 128])]; tensor var_4067_cast_fp16 = reshape(shape = var_4066, x = linear_147_cast_fp16)[name = string("op_4067_cast_fp16")]; tensor x_551_perm_0 = const()[name = string("x_551_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_21_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332610304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333658944))))[name = string("layers_21_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_k_proj_weight_to_fp16_palettized, x = var_4050_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor var_4071 = const()[name = string("op_4071"), val = tensor([1, 1, 8, 128])]; tensor var_4072_cast_fp16 = reshape(shape = var_4071, x = linear_148_cast_fp16)[name = string("op_4072_cast_fp16")]; tensor x_555_perm_0 = const()[name = string("x_555_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_21_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333659520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334708160))))[name = string("layers_21_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_v_proj_weight_to_fp16_palettized, x = var_4050_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor var_4076 = const()[name = string("op_4076"), val = tensor([1, 1, 8, 128])]; tensor var_4077_cast_fp16 = reshape(shape = var_4076, x = linear_149_cast_fp16)[name = string("op_4077_cast_fp16")]; tensor v_43_perm_0 = const()[name = string("v_43_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_4033_promoted_1_to_fp16 = const()[name = string("op_4033_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_551_cast_fp16 = transpose(perm = x_551_perm_0, x = var_4067_cast_fp16)[name = string("transpose_27")]; tensor var_4081_cast_fp16 = pow(x = x_551_cast_fp16, y = var_4033_promoted_1_to_fp16)[name = string("op_4081_cast_fp16")]; tensor var_4083_axes_0 = const()[name = string("op_4083_axes_0"), val = tensor([-1])]; bool var_4083_keep_dims_0 = const()[name = string("op_4083_keep_dims_0"), val = bool(true)]; tensor var_4083_cast_fp16 = reduce_mean(axes = var_4083_axes_0, keep_dims = var_4083_keep_dims_0, x = var_4081_cast_fp16)[name = string("op_4083_cast_fp16")]; fp16 var_4084_to_fp16 = const()[name = string("op_4084_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4085_cast_fp16 = add(x = var_4083_cast_fp16, y = var_4084_to_fp16)[name = string("op_4085_cast_fp16")]; fp32 norm_171_epsilon_0 = const()[name = string("norm_171_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_171_cast_fp16 = rsqrt(epsilon = norm_171_epsilon_0, x = var_4085_cast_fp16)[name = string("norm_171_cast_fp16")]; tensor var_4087_cast_fp16 = mul(x = x_551_cast_fp16, y = norm_171_cast_fp16)[name = string("op_4087_cast_fp16")]; tensor layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334708736)))]; tensor var_4088_cast_fp16 = mul(x = var_4087_cast_fp16, y = layers_21_self_attn_q_norm_weight_to_fp16)[name = string("op_4088_cast_fp16")]; fp16 var_4033_promoted_2_to_fp16 = const()[name = string("op_4033_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_555_cast_fp16 = transpose(perm = x_555_perm_0, x = var_4072_cast_fp16)[name = string("transpose_26")]; tensor var_4092_cast_fp16 = pow(x = x_555_cast_fp16, y = var_4033_promoted_2_to_fp16)[name = string("op_4092_cast_fp16")]; tensor var_4094_axes_0 = const()[name = string("op_4094_axes_0"), val = tensor([-1])]; bool var_4094_keep_dims_0 = const()[name = string("op_4094_keep_dims_0"), val = bool(true)]; tensor var_4094_cast_fp16 = reduce_mean(axes = var_4094_axes_0, keep_dims = var_4094_keep_dims_0, x = var_4092_cast_fp16)[name = string("op_4094_cast_fp16")]; fp16 var_4095_to_fp16 = const()[name = string("op_4095_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4096_cast_fp16 = add(x = var_4094_cast_fp16, y = var_4095_to_fp16)[name = string("op_4096_cast_fp16")]; fp32 norm_173_epsilon_0 = const()[name = string("norm_173_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_173_cast_fp16 = rsqrt(epsilon = norm_173_epsilon_0, x = var_4096_cast_fp16)[name = string("norm_173_cast_fp16")]; tensor var_4098_cast_fp16 = mul(x = x_555_cast_fp16, y = norm_173_cast_fp16)[name = string("op_4098_cast_fp16")]; tensor layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334709056)))]; tensor var_4099_cast_fp16 = mul(x = var_4098_cast_fp16, y = layers_21_self_attn_k_norm_weight_to_fp16)[name = string("op_4099_cast_fp16")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = var_4088_cast_fp16)[name = string("x1_85_cast_fp16")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = var_4088_cast_fp16)[name = string("x2_85_cast_fp16")]; tensor var_4117_cast_fp16 = mul(x = x1_85_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4117_cast_fp16")]; tensor var_4118_cast_fp16 = mul(x = x2_85_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4118_cast_fp16")]; tensor var_4119_cast_fp16 = sub(x = var_4117_cast_fp16, y = var_4118_cast_fp16)[name = string("op_4119_cast_fp16")]; tensor var_4120_cast_fp16 = mul(x = x2_85_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4120_cast_fp16")]; tensor var_4121_cast_fp16 = mul(x = x1_85_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4121_cast_fp16")]; tensor var_4122_cast_fp16 = add(x = var_4120_cast_fp16, y = var_4121_cast_fp16)[name = string("op_4122_cast_fp16")]; bool q_43_interleave_0 = const()[name = string("q_43_interleave_0"), val = bool(false)]; tensor q_43_cast_fp16 = concat(axis = var_4034, interleave = q_43_interleave_0, values = (var_4119_cast_fp16, var_4122_cast_fp16))[name = string("q_43_cast_fp16")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = var_4099_cast_fp16)[name = string("x1_87_cast_fp16")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = var_4099_cast_fp16)[name = string("x2_87_cast_fp16")]; tensor var_4141_cast_fp16 = mul(x = x1_87_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4141_cast_fp16")]; tensor var_4142_cast_fp16 = mul(x = x2_87_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4142_cast_fp16")]; tensor var_4143_cast_fp16 = sub(x = var_4141_cast_fp16, y = var_4142_cast_fp16)[name = string("op_4143_cast_fp16")]; tensor var_4144_cast_fp16 = mul(x = x2_87_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4144_cast_fp16")]; tensor var_4145_cast_fp16 = mul(x = x1_87_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4145_cast_fp16")]; tensor var_4146_cast_fp16 = add(x = var_4144_cast_fp16, y = var_4145_cast_fp16)[name = string("op_4146_cast_fp16")]; bool k_43_interleave_0 = const()[name = string("k_43_interleave_0"), val = bool(false)]; tensor k_43_cast_fp16 = concat(axis = var_4034, interleave = k_43_interleave_0, values = (var_4143_cast_fp16, var_4146_cast_fp16))[name = string("k_43_cast_fp16")]; tensor read_state_42 = read_state(input = k_cache_21)[name = string("read_state_42")]; tensor k_cache_129_cast_fp16 = mul(x = read_state_42, y = var_264_cast_fp16)[name = string("k_cache_129_cast_fp16")]; write_state(data = k_cache_129_cast_fp16, input = k_cache_21)[name = string("coreml_update_state_196_write_state")]; tensor coreml_update_state_196 = read_state(input = k_cache_21)[name = string("coreml_update_state_196")]; tensor var_4151_cast_fp16 = mul(x = k_43_cast_fp16, y = onehot_cast_fp16)[name = string("op_4151_cast_fp16")]; tensor k_cache_131_cast_fp16 = add(x = coreml_update_state_196, y = var_4151_cast_fp16)[name = string("k_cache_131_cast_fp16")]; write_state(data = k_cache_131_cast_fp16, input = k_cache_21)[name = string("coreml_update_state_197_write_state")]; tensor coreml_update_state_197 = read_state(input = k_cache_21)[name = string("coreml_update_state_197")]; tensor read_state_43 = read_state(input = v_cache_21)[name = string("read_state_43")]; tensor v_cache_129_cast_fp16 = mul(x = read_state_43, y = var_264_cast_fp16)[name = string("v_cache_129_cast_fp16")]; write_state(data = v_cache_129_cast_fp16, input = v_cache_21)[name = string("coreml_update_state_198_write_state")]; tensor coreml_update_state_198 = read_state(input = v_cache_21)[name = string("coreml_update_state_198")]; tensor v_43_cast_fp16 = transpose(perm = v_43_perm_0, x = var_4077_cast_fp16)[name = string("transpose_25")]; tensor var_4155_cast_fp16 = mul(x = v_43_cast_fp16, y = onehot_cast_fp16)[name = string("op_4155_cast_fp16")]; tensor v_cache_131_cast_fp16 = add(x = coreml_update_state_198, y = var_4155_cast_fp16)[name = string("v_cache_131_cast_fp16")]; write_state(data = v_cache_131_cast_fp16, input = v_cache_21)[name = string("coreml_update_state_199_write_state")]; tensor coreml_update_state_199 = read_state(input = v_cache_21)[name = string("coreml_update_state_199")]; tensor var_4157_axes_0 = const()[name = string("op_4157_axes_0"), val = tensor([2])]; tensor var_4157_cast_fp16 = expand_dims(axes = var_4157_axes_0, x = coreml_update_state_197)[name = string("op_4157_cast_fp16")]; tensor k_exp_85_reps_0 = const()[name = string("k_exp_85_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_85_cast_fp16 = tile(reps = k_exp_85_reps_0, x = var_4157_cast_fp16)[name = string("k_exp_85_cast_fp16")]; tensor var_4160 = const()[name = string("op_4160"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_87_cast_fp16 = reshape(shape = var_4160, x = k_exp_85_cast_fp16)[name = string("k_exp_87_cast_fp16")]; tensor var_4162_axes_0 = const()[name = string("op_4162_axes_0"), val = tensor([2])]; tensor var_4162_cast_fp16 = expand_dims(axes = var_4162_axes_0, x = coreml_update_state_199)[name = string("op_4162_cast_fp16")]; tensor v_exp_85_reps_0 = const()[name = string("v_exp_85_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_85_cast_fp16 = tile(reps = v_exp_85_reps_0, x = var_4162_cast_fp16)[name = string("v_exp_85_cast_fp16")]; tensor var_4165 = const()[name = string("op_4165"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_87_cast_fp16 = reshape(shape = var_4165, x = v_exp_85_cast_fp16)[name = string("v_exp_87_cast_fp16")]; bool var_4168_transpose_x_1 = const()[name = string("op_4168_transpose_x_1"), val = bool(false)]; bool var_4168_transpose_y_1 = const()[name = string("op_4168_transpose_y_1"), val = bool(true)]; tensor var_4168_cast_fp16 = matmul(transpose_x = var_4168_transpose_x_1, transpose_y = var_4168_transpose_y_1, x = q_43_cast_fp16, y = k_exp_87_cast_fp16)[name = string("op_4168_cast_fp16")]; fp16 var_4169_to_fp16 = const()[name = string("op_4169_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_85_cast_fp16 = mul(x = var_4168_cast_fp16, y = var_4169_to_fp16)[name = string("attn_85_cast_fp16")]; tensor input_211_cast_fp16 = add(x = attn_85_cast_fp16, y = attention_mask_to_fp16)[name = string("input_211_cast_fp16")]; tensor attn_87_cast_fp16 = softmax(axis = var_4034, x = input_211_cast_fp16)[name = string("attn_87_cast_fp16")]; bool out_43_transpose_x_0 = const()[name = string("out_43_transpose_x_0"), val = bool(false)]; bool out_43_transpose_y_0 = const()[name = string("out_43_transpose_y_0"), val = bool(false)]; tensor out_43_cast_fp16 = matmul(transpose_x = out_43_transpose_x_0, transpose_y = out_43_transpose_y_0, x = attn_87_cast_fp16, y = v_exp_87_cast_fp16)[name = string("out_43_cast_fp16")]; tensor var_4174_perm_0 = const()[name = string("op_4174_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4175 = const()[name = string("op_4175"), val = tensor([1, 1, -1])]; tensor var_4174_cast_fp16 = transpose(perm = var_4174_perm_0, x = out_43_cast_fp16)[name = string("transpose_24")]; tensor input_213_cast_fp16 = reshape(shape = var_4175, x = var_4174_cast_fp16)[name = string("input_213_cast_fp16")]; tensor layers_21_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334709376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336806592))))[name = string("layers_21_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_150_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_o_proj_weight_to_fp16_palettized, x = input_213_cast_fp16)[name = string("linear_150_cast_fp16")]; tensor x_565_cast_fp16 = add(x = x_545_cast_fp16, y = linear_150_cast_fp16)[name = string("x_565_cast_fp16")]; fp16 var_4033_promoted_3_to_fp16 = const()[name = string("op_4033_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4182_cast_fp16 = pow(x = x_565_cast_fp16, y = var_4033_promoted_3_to_fp16)[name = string("op_4182_cast_fp16")]; tensor var_4184_axes_0 = const()[name = string("op_4184_axes_0"), val = tensor([-1])]; bool var_4184_keep_dims_0 = const()[name = string("op_4184_keep_dims_0"), val = bool(true)]; tensor var_4184_cast_fp16 = reduce_mean(axes = var_4184_axes_0, keep_dims = var_4184_keep_dims_0, x = var_4182_cast_fp16)[name = string("op_4184_cast_fp16")]; fp16 var_4185_to_fp16 = const()[name = string("op_4185_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4186_cast_fp16 = add(x = var_4184_cast_fp16, y = var_4185_to_fp16)[name = string("op_4186_cast_fp16")]; fp32 norm_175_epsilon_0 = const()[name = string("norm_175_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_175_cast_fp16 = rsqrt(epsilon = norm_175_epsilon_0, x = var_4186_cast_fp16)[name = string("norm_175_cast_fp16")]; tensor var_4188_cast_fp16 = mul(x = x_565_cast_fp16, y = norm_175_cast_fp16)[name = string("op_4188_cast_fp16")]; tensor layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336807168)))]; tensor var_4189_cast_fp16 = mul(x = var_4188_cast_fp16, y = layers_21_post_attention_layernorm_weight_to_fp16)[name = string("op_4189_cast_fp16")]; tensor layers_21_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336809280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339955072))))[name = string("layers_21_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_gate_proj_weight_to_fp16_palettized, x = var_4189_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor var_4199_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_4199_cast_fp16")]; tensor layers_21_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339955648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343101440))))[name = string("layers_21_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_up_proj_weight_to_fp16_palettized, x = var_4189_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor input_219_cast_fp16 = mul(x = var_4199_cast_fp16, y = linear_152_cast_fp16)[name = string("input_219_cast_fp16")]; tensor layers_21_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343102016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346247808))))[name = string("layers_21_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_mlp_down_proj_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor x_571_cast_fp16 = add(x = x_565_cast_fp16, y = linear_153_cast_fp16)[name = string("x_571_cast_fp16")]; int32 var_4219 = const()[name = string("op_4219"), val = int32(-1)]; fp16 var_4218_promoted_to_fp16 = const()[name = string("op_4218_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4228_cast_fp16 = pow(x = x_571_cast_fp16, y = var_4218_promoted_to_fp16)[name = string("op_4228_cast_fp16")]; tensor var_4230_axes_0 = const()[name = string("op_4230_axes_0"), val = tensor([-1])]; bool var_4230_keep_dims_0 = const()[name = string("op_4230_keep_dims_0"), val = bool(true)]; tensor var_4230_cast_fp16 = reduce_mean(axes = var_4230_axes_0, keep_dims = var_4230_keep_dims_0, x = var_4228_cast_fp16)[name = string("op_4230_cast_fp16")]; fp16 var_4231_to_fp16 = const()[name = string("op_4231_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4232_cast_fp16 = add(x = var_4230_cast_fp16, y = var_4231_to_fp16)[name = string("op_4232_cast_fp16")]; fp32 norm_177_epsilon_0 = const()[name = string("norm_177_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_177_cast_fp16 = rsqrt(epsilon = norm_177_epsilon_0, x = var_4232_cast_fp16)[name = string("norm_177_cast_fp16")]; tensor var_4234_cast_fp16 = mul(x = x_571_cast_fp16, y = norm_177_cast_fp16)[name = string("op_4234_cast_fp16")]; tensor layers_22_input_layernorm_weight_to_fp16 = const()[name = string("layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346248384)))]; tensor var_4235_cast_fp16 = mul(x = var_4234_cast_fp16, y = layers_22_input_layernorm_weight_to_fp16)[name = string("op_4235_cast_fp16")]; tensor layers_22_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346250496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348347712))))[name = string("layers_22_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_22_self_attn_q_proj_weight_to_fp16_palettized, x = var_4235_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor var_4251 = const()[name = string("op_4251"), val = tensor([1, 1, 16, 128])]; tensor var_4252_cast_fp16 = reshape(shape = var_4251, x = linear_154_cast_fp16)[name = string("op_4252_cast_fp16")]; tensor x_577_perm_0 = const()[name = string("x_577_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_22_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348348288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349396928))))[name = string("layers_22_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_k_proj_weight_to_fp16_palettized, x = var_4235_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor var_4256 = const()[name = string("op_4256"), val = tensor([1, 1, 8, 128])]; tensor var_4257_cast_fp16 = reshape(shape = var_4256, x = linear_155_cast_fp16)[name = string("op_4257_cast_fp16")]; tensor x_581_perm_0 = const()[name = string("x_581_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_22_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349397504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350446144))))[name = string("layers_22_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_v_proj_weight_to_fp16_palettized, x = var_4235_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor var_4261 = const()[name = string("op_4261"), val = tensor([1, 1, 8, 128])]; tensor var_4262_cast_fp16 = reshape(shape = var_4261, x = linear_156_cast_fp16)[name = string("op_4262_cast_fp16")]; tensor v_45_perm_0 = const()[name = string("v_45_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_4218_promoted_1_to_fp16 = const()[name = string("op_4218_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_577_cast_fp16 = transpose(perm = x_577_perm_0, x = var_4252_cast_fp16)[name = string("transpose_23")]; tensor var_4266_cast_fp16 = pow(x = x_577_cast_fp16, y = var_4218_promoted_1_to_fp16)[name = string("op_4266_cast_fp16")]; tensor var_4268_axes_0 = const()[name = string("op_4268_axes_0"), val = tensor([-1])]; bool var_4268_keep_dims_0 = const()[name = string("op_4268_keep_dims_0"), val = bool(true)]; tensor var_4268_cast_fp16 = reduce_mean(axes = var_4268_axes_0, keep_dims = var_4268_keep_dims_0, x = var_4266_cast_fp16)[name = string("op_4268_cast_fp16")]; fp16 var_4269_to_fp16 = const()[name = string("op_4269_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4270_cast_fp16 = add(x = var_4268_cast_fp16, y = var_4269_to_fp16)[name = string("op_4270_cast_fp16")]; fp32 norm_179_epsilon_0 = const()[name = string("norm_179_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_179_cast_fp16 = rsqrt(epsilon = norm_179_epsilon_0, x = var_4270_cast_fp16)[name = string("norm_179_cast_fp16")]; tensor var_4272_cast_fp16 = mul(x = x_577_cast_fp16, y = norm_179_cast_fp16)[name = string("op_4272_cast_fp16")]; tensor layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350446720)))]; tensor var_4273_cast_fp16 = mul(x = var_4272_cast_fp16, y = layers_22_self_attn_q_norm_weight_to_fp16)[name = string("op_4273_cast_fp16")]; fp16 var_4218_promoted_2_to_fp16 = const()[name = string("op_4218_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_581_cast_fp16 = transpose(perm = x_581_perm_0, x = var_4257_cast_fp16)[name = string("transpose_22")]; tensor var_4277_cast_fp16 = pow(x = x_581_cast_fp16, y = var_4218_promoted_2_to_fp16)[name = string("op_4277_cast_fp16")]; tensor var_4279_axes_0 = const()[name = string("op_4279_axes_0"), val = tensor([-1])]; bool var_4279_keep_dims_0 = const()[name = string("op_4279_keep_dims_0"), val = bool(true)]; tensor var_4279_cast_fp16 = reduce_mean(axes = var_4279_axes_0, keep_dims = var_4279_keep_dims_0, x = var_4277_cast_fp16)[name = string("op_4279_cast_fp16")]; fp16 var_4280_to_fp16 = const()[name = string("op_4280_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4281_cast_fp16 = add(x = var_4279_cast_fp16, y = var_4280_to_fp16)[name = string("op_4281_cast_fp16")]; fp32 norm_181_epsilon_0 = const()[name = string("norm_181_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_181_cast_fp16 = rsqrt(epsilon = norm_181_epsilon_0, x = var_4281_cast_fp16)[name = string("norm_181_cast_fp16")]; tensor var_4283_cast_fp16 = mul(x = x_581_cast_fp16, y = norm_181_cast_fp16)[name = string("op_4283_cast_fp16")]; tensor layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350447040)))]; tensor var_4284_cast_fp16 = mul(x = var_4283_cast_fp16, y = layers_22_self_attn_k_norm_weight_to_fp16)[name = string("op_4284_cast_fp16")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = var_4273_cast_fp16)[name = string("x1_89_cast_fp16")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = var_4273_cast_fp16)[name = string("x2_89_cast_fp16")]; tensor var_4302_cast_fp16 = mul(x = x1_89_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4302_cast_fp16")]; tensor var_4303_cast_fp16 = mul(x = x2_89_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4303_cast_fp16")]; tensor var_4304_cast_fp16 = sub(x = var_4302_cast_fp16, y = var_4303_cast_fp16)[name = string("op_4304_cast_fp16")]; tensor var_4305_cast_fp16 = mul(x = x2_89_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4305_cast_fp16")]; tensor var_4306_cast_fp16 = mul(x = x1_89_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4306_cast_fp16")]; tensor var_4307_cast_fp16 = add(x = var_4305_cast_fp16, y = var_4306_cast_fp16)[name = string("op_4307_cast_fp16")]; bool q_45_interleave_0 = const()[name = string("q_45_interleave_0"), val = bool(false)]; tensor q_45_cast_fp16 = concat(axis = var_4219, interleave = q_45_interleave_0, values = (var_4304_cast_fp16, var_4307_cast_fp16))[name = string("q_45_cast_fp16")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = var_4284_cast_fp16)[name = string("x1_91_cast_fp16")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = var_4284_cast_fp16)[name = string("x2_91_cast_fp16")]; tensor var_4326_cast_fp16 = mul(x = x1_91_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4326_cast_fp16")]; tensor var_4327_cast_fp16 = mul(x = x2_91_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4327_cast_fp16")]; tensor var_4328_cast_fp16 = sub(x = var_4326_cast_fp16, y = var_4327_cast_fp16)[name = string("op_4328_cast_fp16")]; tensor var_4329_cast_fp16 = mul(x = x2_91_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4329_cast_fp16")]; tensor var_4330_cast_fp16 = mul(x = x1_91_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4330_cast_fp16")]; tensor var_4331_cast_fp16 = add(x = var_4329_cast_fp16, y = var_4330_cast_fp16)[name = string("op_4331_cast_fp16")]; bool k_45_interleave_0 = const()[name = string("k_45_interleave_0"), val = bool(false)]; tensor k_45_cast_fp16 = concat(axis = var_4219, interleave = k_45_interleave_0, values = (var_4328_cast_fp16, var_4331_cast_fp16))[name = string("k_45_cast_fp16")]; tensor read_state_44 = read_state(input = k_cache_22)[name = string("read_state_44")]; tensor k_cache_135_cast_fp16 = mul(x = read_state_44, y = var_264_cast_fp16)[name = string("k_cache_135_cast_fp16")]; write_state(data = k_cache_135_cast_fp16, input = k_cache_22)[name = string("coreml_update_state_200_write_state")]; tensor coreml_update_state_200 = read_state(input = k_cache_22)[name = string("coreml_update_state_200")]; tensor var_4336_cast_fp16 = mul(x = k_45_cast_fp16, y = onehot_cast_fp16)[name = string("op_4336_cast_fp16")]; tensor k_cache_137_cast_fp16 = add(x = coreml_update_state_200, y = var_4336_cast_fp16)[name = string("k_cache_137_cast_fp16")]; write_state(data = k_cache_137_cast_fp16, input = k_cache_22)[name = string("coreml_update_state_201_write_state")]; tensor coreml_update_state_201 = read_state(input = k_cache_22)[name = string("coreml_update_state_201")]; tensor read_state_45 = read_state(input = v_cache_22)[name = string("read_state_45")]; tensor v_cache_135_cast_fp16 = mul(x = read_state_45, y = var_264_cast_fp16)[name = string("v_cache_135_cast_fp16")]; write_state(data = v_cache_135_cast_fp16, input = v_cache_22)[name = string("coreml_update_state_202_write_state")]; tensor coreml_update_state_202 = read_state(input = v_cache_22)[name = string("coreml_update_state_202")]; tensor v_45_cast_fp16 = transpose(perm = v_45_perm_0, x = var_4262_cast_fp16)[name = string("transpose_21")]; tensor var_4340_cast_fp16 = mul(x = v_45_cast_fp16, y = onehot_cast_fp16)[name = string("op_4340_cast_fp16")]; tensor v_cache_137_cast_fp16 = add(x = coreml_update_state_202, y = var_4340_cast_fp16)[name = string("v_cache_137_cast_fp16")]; write_state(data = v_cache_137_cast_fp16, input = v_cache_22)[name = string("coreml_update_state_203_write_state")]; tensor coreml_update_state_203 = read_state(input = v_cache_22)[name = string("coreml_update_state_203")]; tensor var_4342_axes_0 = const()[name = string("op_4342_axes_0"), val = tensor([2])]; tensor var_4342_cast_fp16 = expand_dims(axes = var_4342_axes_0, x = coreml_update_state_201)[name = string("op_4342_cast_fp16")]; tensor k_exp_89_reps_0 = const()[name = string("k_exp_89_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_89_cast_fp16 = tile(reps = k_exp_89_reps_0, x = var_4342_cast_fp16)[name = string("k_exp_89_cast_fp16")]; tensor var_4345 = const()[name = string("op_4345"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_91_cast_fp16 = reshape(shape = var_4345, x = k_exp_89_cast_fp16)[name = string("k_exp_91_cast_fp16")]; tensor var_4347_axes_0 = const()[name = string("op_4347_axes_0"), val = tensor([2])]; tensor var_4347_cast_fp16 = expand_dims(axes = var_4347_axes_0, x = coreml_update_state_203)[name = string("op_4347_cast_fp16")]; tensor v_exp_89_reps_0 = const()[name = string("v_exp_89_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_89_cast_fp16 = tile(reps = v_exp_89_reps_0, x = var_4347_cast_fp16)[name = string("v_exp_89_cast_fp16")]; tensor var_4350 = const()[name = string("op_4350"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_91_cast_fp16 = reshape(shape = var_4350, x = v_exp_89_cast_fp16)[name = string("v_exp_91_cast_fp16")]; bool var_4353_transpose_x_1 = const()[name = string("op_4353_transpose_x_1"), val = bool(false)]; bool var_4353_transpose_y_1 = const()[name = string("op_4353_transpose_y_1"), val = bool(true)]; tensor var_4353_cast_fp16 = matmul(transpose_x = var_4353_transpose_x_1, transpose_y = var_4353_transpose_y_1, x = q_45_cast_fp16, y = k_exp_91_cast_fp16)[name = string("op_4353_cast_fp16")]; fp16 var_4354_to_fp16 = const()[name = string("op_4354_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_89_cast_fp16 = mul(x = var_4353_cast_fp16, y = var_4354_to_fp16)[name = string("attn_89_cast_fp16")]; tensor input_221_cast_fp16 = add(x = attn_89_cast_fp16, y = attention_mask_to_fp16)[name = string("input_221_cast_fp16")]; tensor attn_91_cast_fp16 = softmax(axis = var_4219, x = input_221_cast_fp16)[name = string("attn_91_cast_fp16")]; bool out_45_transpose_x_0 = const()[name = string("out_45_transpose_x_0"), val = bool(false)]; bool out_45_transpose_y_0 = const()[name = string("out_45_transpose_y_0"), val = bool(false)]; tensor out_45_cast_fp16 = matmul(transpose_x = out_45_transpose_x_0, transpose_y = out_45_transpose_y_0, x = attn_91_cast_fp16, y = v_exp_91_cast_fp16)[name = string("out_45_cast_fp16")]; tensor var_4359_perm_0 = const()[name = string("op_4359_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4360 = const()[name = string("op_4360"), val = tensor([1, 1, -1])]; tensor var_4359_cast_fp16 = transpose(perm = var_4359_perm_0, x = out_45_cast_fp16)[name = string("transpose_20")]; tensor input_223_cast_fp16 = reshape(shape = var_4360, x = var_4359_cast_fp16)[name = string("input_223_cast_fp16")]; tensor layers_22_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350447360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352544576))))[name = string("layers_22_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_o_proj_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor x_591_cast_fp16 = add(x = x_571_cast_fp16, y = linear_157_cast_fp16)[name = string("x_591_cast_fp16")]; fp16 var_4218_promoted_3_to_fp16 = const()[name = string("op_4218_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4367_cast_fp16 = pow(x = x_591_cast_fp16, y = var_4218_promoted_3_to_fp16)[name = string("op_4367_cast_fp16")]; tensor var_4369_axes_0 = const()[name = string("op_4369_axes_0"), val = tensor([-1])]; bool var_4369_keep_dims_0 = const()[name = string("op_4369_keep_dims_0"), val = bool(true)]; tensor var_4369_cast_fp16 = reduce_mean(axes = var_4369_axes_0, keep_dims = var_4369_keep_dims_0, x = var_4367_cast_fp16)[name = string("op_4369_cast_fp16")]; fp16 var_4370_to_fp16 = const()[name = string("op_4370_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4371_cast_fp16 = add(x = var_4369_cast_fp16, y = var_4370_to_fp16)[name = string("op_4371_cast_fp16")]; fp32 norm_183_epsilon_0 = const()[name = string("norm_183_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_183_cast_fp16 = rsqrt(epsilon = norm_183_epsilon_0, x = var_4371_cast_fp16)[name = string("norm_183_cast_fp16")]; tensor var_4373_cast_fp16 = mul(x = x_591_cast_fp16, y = norm_183_cast_fp16)[name = string("op_4373_cast_fp16")]; tensor layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352545152)))]; tensor var_4374_cast_fp16 = mul(x = var_4373_cast_fp16, y = layers_22_post_attention_layernorm_weight_to_fp16)[name = string("op_4374_cast_fp16")]; tensor layers_22_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352547264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355693056))))[name = string("layers_22_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_gate_proj_weight_to_fp16_palettized, x = var_4374_cast_fp16)[name = string("linear_158_cast_fp16")]; tensor var_4384_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_4384_cast_fp16")]; tensor layers_22_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355693632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358839424))))[name = string("layers_22_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_up_proj_weight_to_fp16_palettized, x = var_4374_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor input_229_cast_fp16 = mul(x = var_4384_cast_fp16, y = linear_159_cast_fp16)[name = string("input_229_cast_fp16")]; tensor layers_22_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358840000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361985792))))[name = string("layers_22_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_160_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_mlp_down_proj_weight_to_fp16_palettized, x = input_229_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor x_597_cast_fp16 = add(x = x_591_cast_fp16, y = linear_160_cast_fp16)[name = string("x_597_cast_fp16")]; int32 var_4404 = const()[name = string("op_4404"), val = int32(-1)]; fp16 var_4403_promoted_to_fp16 = const()[name = string("op_4403_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4413_cast_fp16 = pow(x = x_597_cast_fp16, y = var_4403_promoted_to_fp16)[name = string("op_4413_cast_fp16")]; tensor var_4415_axes_0 = const()[name = string("op_4415_axes_0"), val = tensor([-1])]; bool var_4415_keep_dims_0 = const()[name = string("op_4415_keep_dims_0"), val = bool(true)]; tensor var_4415_cast_fp16 = reduce_mean(axes = var_4415_axes_0, keep_dims = var_4415_keep_dims_0, x = var_4413_cast_fp16)[name = string("op_4415_cast_fp16")]; fp16 var_4416_to_fp16 = const()[name = string("op_4416_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4417_cast_fp16 = add(x = var_4415_cast_fp16, y = var_4416_to_fp16)[name = string("op_4417_cast_fp16")]; fp32 norm_185_epsilon_0 = const()[name = string("norm_185_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_185_cast_fp16 = rsqrt(epsilon = norm_185_epsilon_0, x = var_4417_cast_fp16)[name = string("norm_185_cast_fp16")]; tensor var_4419_cast_fp16 = mul(x = x_597_cast_fp16, y = norm_185_cast_fp16)[name = string("op_4419_cast_fp16")]; tensor layers_23_input_layernorm_weight_to_fp16 = const()[name = string("layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361986368)))]; tensor var_4420_cast_fp16 = mul(x = var_4419_cast_fp16, y = layers_23_input_layernorm_weight_to_fp16)[name = string("op_4420_cast_fp16")]; tensor layers_23_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361988480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364085696))))[name = string("layers_23_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_23_self_attn_q_proj_weight_to_fp16_palettized, x = var_4420_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor var_4436 = const()[name = string("op_4436"), val = tensor([1, 1, 16, 128])]; tensor var_4437_cast_fp16 = reshape(shape = var_4436, x = linear_161_cast_fp16)[name = string("op_4437_cast_fp16")]; tensor x_603_perm_0 = const()[name = string("x_603_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_23_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364086272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365134912))))[name = string("layers_23_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_k_proj_weight_to_fp16_palettized, x = var_4420_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor var_4441 = const()[name = string("op_4441"), val = tensor([1, 1, 8, 128])]; tensor var_4442_cast_fp16 = reshape(shape = var_4441, x = linear_162_cast_fp16)[name = string("op_4442_cast_fp16")]; tensor x_607_perm_0 = const()[name = string("x_607_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_23_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365135488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366184128))))[name = string("layers_23_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_v_proj_weight_to_fp16_palettized, x = var_4420_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor var_4446 = const()[name = string("op_4446"), val = tensor([1, 1, 8, 128])]; tensor var_4447_cast_fp16 = reshape(shape = var_4446, x = linear_163_cast_fp16)[name = string("op_4447_cast_fp16")]; tensor v_47_perm_0 = const()[name = string("v_47_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_4403_promoted_1_to_fp16 = const()[name = string("op_4403_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_603_cast_fp16 = transpose(perm = x_603_perm_0, x = var_4437_cast_fp16)[name = string("transpose_19")]; tensor var_4451_cast_fp16 = pow(x = x_603_cast_fp16, y = var_4403_promoted_1_to_fp16)[name = string("op_4451_cast_fp16")]; tensor var_4453_axes_0 = const()[name = string("op_4453_axes_0"), val = tensor([-1])]; bool var_4453_keep_dims_0 = const()[name = string("op_4453_keep_dims_0"), val = bool(true)]; tensor var_4453_cast_fp16 = reduce_mean(axes = var_4453_axes_0, keep_dims = var_4453_keep_dims_0, x = var_4451_cast_fp16)[name = string("op_4453_cast_fp16")]; fp16 var_4454_to_fp16 = const()[name = string("op_4454_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4455_cast_fp16 = add(x = var_4453_cast_fp16, y = var_4454_to_fp16)[name = string("op_4455_cast_fp16")]; fp32 norm_187_epsilon_0 = const()[name = string("norm_187_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_187_cast_fp16 = rsqrt(epsilon = norm_187_epsilon_0, x = var_4455_cast_fp16)[name = string("norm_187_cast_fp16")]; tensor var_4457_cast_fp16 = mul(x = x_603_cast_fp16, y = norm_187_cast_fp16)[name = string("op_4457_cast_fp16")]; tensor layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366184704)))]; tensor var_4458_cast_fp16 = mul(x = var_4457_cast_fp16, y = layers_23_self_attn_q_norm_weight_to_fp16)[name = string("op_4458_cast_fp16")]; fp16 var_4403_promoted_2_to_fp16 = const()[name = string("op_4403_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_607_cast_fp16 = transpose(perm = x_607_perm_0, x = var_4442_cast_fp16)[name = string("transpose_18")]; tensor var_4462_cast_fp16 = pow(x = x_607_cast_fp16, y = var_4403_promoted_2_to_fp16)[name = string("op_4462_cast_fp16")]; tensor var_4464_axes_0 = const()[name = string("op_4464_axes_0"), val = tensor([-1])]; bool var_4464_keep_dims_0 = const()[name = string("op_4464_keep_dims_0"), val = bool(true)]; tensor var_4464_cast_fp16 = reduce_mean(axes = var_4464_axes_0, keep_dims = var_4464_keep_dims_0, x = var_4462_cast_fp16)[name = string("op_4464_cast_fp16")]; fp16 var_4465_to_fp16 = const()[name = string("op_4465_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4466_cast_fp16 = add(x = var_4464_cast_fp16, y = var_4465_to_fp16)[name = string("op_4466_cast_fp16")]; fp32 norm_189_epsilon_0 = const()[name = string("norm_189_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_189_cast_fp16 = rsqrt(epsilon = norm_189_epsilon_0, x = var_4466_cast_fp16)[name = string("norm_189_cast_fp16")]; tensor var_4468_cast_fp16 = mul(x = x_607_cast_fp16, y = norm_189_cast_fp16)[name = string("op_4468_cast_fp16")]; tensor layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366185024)))]; tensor var_4469_cast_fp16 = mul(x = var_4468_cast_fp16, y = layers_23_self_attn_k_norm_weight_to_fp16)[name = string("op_4469_cast_fp16")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = var_4458_cast_fp16)[name = string("x1_93_cast_fp16")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = var_4458_cast_fp16)[name = string("x2_93_cast_fp16")]; tensor var_4487_cast_fp16 = mul(x = x1_93_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4487_cast_fp16")]; tensor var_4488_cast_fp16 = mul(x = x2_93_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4488_cast_fp16")]; tensor var_4489_cast_fp16 = sub(x = var_4487_cast_fp16, y = var_4488_cast_fp16)[name = string("op_4489_cast_fp16")]; tensor var_4490_cast_fp16 = mul(x = x2_93_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4490_cast_fp16")]; tensor var_4491_cast_fp16 = mul(x = x1_93_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4491_cast_fp16")]; tensor var_4492_cast_fp16 = add(x = var_4490_cast_fp16, y = var_4491_cast_fp16)[name = string("op_4492_cast_fp16")]; bool q_47_interleave_0 = const()[name = string("q_47_interleave_0"), val = bool(false)]; tensor q_47_cast_fp16 = concat(axis = var_4404, interleave = q_47_interleave_0, values = (var_4489_cast_fp16, var_4492_cast_fp16))[name = string("q_47_cast_fp16")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = var_4469_cast_fp16)[name = string("x1_95_cast_fp16")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = var_4469_cast_fp16)[name = string("x2_95_cast_fp16")]; tensor var_4511_cast_fp16 = mul(x = x1_95_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4511_cast_fp16")]; tensor var_4512_cast_fp16 = mul(x = x2_95_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4512_cast_fp16")]; tensor var_4513_cast_fp16 = sub(x = var_4511_cast_fp16, y = var_4512_cast_fp16)[name = string("op_4513_cast_fp16")]; tensor var_4514_cast_fp16 = mul(x = x2_95_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4514_cast_fp16")]; tensor var_4515_cast_fp16 = mul(x = x1_95_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4515_cast_fp16")]; tensor var_4516_cast_fp16 = add(x = var_4514_cast_fp16, y = var_4515_cast_fp16)[name = string("op_4516_cast_fp16")]; bool k_47_interleave_0 = const()[name = string("k_47_interleave_0"), val = bool(false)]; tensor k_47_cast_fp16 = concat(axis = var_4404, interleave = k_47_interleave_0, values = (var_4513_cast_fp16, var_4516_cast_fp16))[name = string("k_47_cast_fp16")]; tensor read_state_46 = read_state(input = k_cache_23)[name = string("read_state_46")]; tensor k_cache_141_cast_fp16 = mul(x = read_state_46, y = var_264_cast_fp16)[name = string("k_cache_141_cast_fp16")]; write_state(data = k_cache_141_cast_fp16, input = k_cache_23)[name = string("coreml_update_state_204_write_state")]; tensor coreml_update_state_204 = read_state(input = k_cache_23)[name = string("coreml_update_state_204")]; tensor var_4521_cast_fp16 = mul(x = k_47_cast_fp16, y = onehot_cast_fp16)[name = string("op_4521_cast_fp16")]; tensor k_cache_143_cast_fp16 = add(x = coreml_update_state_204, y = var_4521_cast_fp16)[name = string("k_cache_143_cast_fp16")]; write_state(data = k_cache_143_cast_fp16, input = k_cache_23)[name = string("coreml_update_state_205_write_state")]; tensor coreml_update_state_205 = read_state(input = k_cache_23)[name = string("coreml_update_state_205")]; tensor read_state_47 = read_state(input = v_cache_23)[name = string("read_state_47")]; tensor v_cache_141_cast_fp16 = mul(x = read_state_47, y = var_264_cast_fp16)[name = string("v_cache_141_cast_fp16")]; write_state(data = v_cache_141_cast_fp16, input = v_cache_23)[name = string("coreml_update_state_206_write_state")]; tensor coreml_update_state_206 = read_state(input = v_cache_23)[name = string("coreml_update_state_206")]; tensor v_47_cast_fp16 = transpose(perm = v_47_perm_0, x = var_4447_cast_fp16)[name = string("transpose_17")]; tensor var_4525_cast_fp16 = mul(x = v_47_cast_fp16, y = onehot_cast_fp16)[name = string("op_4525_cast_fp16")]; tensor v_cache_143_cast_fp16 = add(x = coreml_update_state_206, y = var_4525_cast_fp16)[name = string("v_cache_143_cast_fp16")]; write_state(data = v_cache_143_cast_fp16, input = v_cache_23)[name = string("coreml_update_state_207_write_state")]; tensor coreml_update_state_207 = read_state(input = v_cache_23)[name = string("coreml_update_state_207")]; tensor var_4527_axes_0 = const()[name = string("op_4527_axes_0"), val = tensor([2])]; tensor var_4527_cast_fp16 = expand_dims(axes = var_4527_axes_0, x = coreml_update_state_205)[name = string("op_4527_cast_fp16")]; tensor k_exp_93_reps_0 = const()[name = string("k_exp_93_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_93_cast_fp16 = tile(reps = k_exp_93_reps_0, x = var_4527_cast_fp16)[name = string("k_exp_93_cast_fp16")]; tensor var_4530 = const()[name = string("op_4530"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_95_cast_fp16 = reshape(shape = var_4530, x = k_exp_93_cast_fp16)[name = string("k_exp_95_cast_fp16")]; tensor var_4532_axes_0 = const()[name = string("op_4532_axes_0"), val = tensor([2])]; tensor var_4532_cast_fp16 = expand_dims(axes = var_4532_axes_0, x = coreml_update_state_207)[name = string("op_4532_cast_fp16")]; tensor v_exp_93_reps_0 = const()[name = string("v_exp_93_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_93_cast_fp16 = tile(reps = v_exp_93_reps_0, x = var_4532_cast_fp16)[name = string("v_exp_93_cast_fp16")]; tensor var_4535 = const()[name = string("op_4535"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_95_cast_fp16 = reshape(shape = var_4535, x = v_exp_93_cast_fp16)[name = string("v_exp_95_cast_fp16")]; bool var_4538_transpose_x_1 = const()[name = string("op_4538_transpose_x_1"), val = bool(false)]; bool var_4538_transpose_y_1 = const()[name = string("op_4538_transpose_y_1"), val = bool(true)]; tensor var_4538_cast_fp16 = matmul(transpose_x = var_4538_transpose_x_1, transpose_y = var_4538_transpose_y_1, x = q_47_cast_fp16, y = k_exp_95_cast_fp16)[name = string("op_4538_cast_fp16")]; fp16 var_4539_to_fp16 = const()[name = string("op_4539_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_93_cast_fp16 = mul(x = var_4538_cast_fp16, y = var_4539_to_fp16)[name = string("attn_93_cast_fp16")]; tensor input_231_cast_fp16 = add(x = attn_93_cast_fp16, y = attention_mask_to_fp16)[name = string("input_231_cast_fp16")]; tensor attn_95_cast_fp16 = softmax(axis = var_4404, x = input_231_cast_fp16)[name = string("attn_95_cast_fp16")]; bool out_47_transpose_x_0 = const()[name = string("out_47_transpose_x_0"), val = bool(false)]; bool out_47_transpose_y_0 = const()[name = string("out_47_transpose_y_0"), val = bool(false)]; tensor out_47_cast_fp16 = matmul(transpose_x = out_47_transpose_x_0, transpose_y = out_47_transpose_y_0, x = attn_95_cast_fp16, y = v_exp_95_cast_fp16)[name = string("out_47_cast_fp16")]; tensor var_4544_perm_0 = const()[name = string("op_4544_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4545 = const()[name = string("op_4545"), val = tensor([1, 1, -1])]; tensor var_4544_cast_fp16 = transpose(perm = var_4544_perm_0, x = out_47_cast_fp16)[name = string("transpose_16")]; tensor input_233_cast_fp16 = reshape(shape = var_4545, x = var_4544_cast_fp16)[name = string("input_233_cast_fp16")]; tensor layers_23_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366185344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368282560))))[name = string("layers_23_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_164_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_o_proj_weight_to_fp16_palettized, x = input_233_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor x_617_cast_fp16 = add(x = x_597_cast_fp16, y = linear_164_cast_fp16)[name = string("x_617_cast_fp16")]; fp16 var_4403_promoted_3_to_fp16 = const()[name = string("op_4403_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4552_cast_fp16 = pow(x = x_617_cast_fp16, y = var_4403_promoted_3_to_fp16)[name = string("op_4552_cast_fp16")]; tensor var_4554_axes_0 = const()[name = string("op_4554_axes_0"), val = tensor([-1])]; bool var_4554_keep_dims_0 = const()[name = string("op_4554_keep_dims_0"), val = bool(true)]; tensor var_4554_cast_fp16 = reduce_mean(axes = var_4554_axes_0, keep_dims = var_4554_keep_dims_0, x = var_4552_cast_fp16)[name = string("op_4554_cast_fp16")]; fp16 var_4555_to_fp16 = const()[name = string("op_4555_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4556_cast_fp16 = add(x = var_4554_cast_fp16, y = var_4555_to_fp16)[name = string("op_4556_cast_fp16")]; fp32 norm_191_epsilon_0 = const()[name = string("norm_191_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_191_cast_fp16 = rsqrt(epsilon = norm_191_epsilon_0, x = var_4556_cast_fp16)[name = string("norm_191_cast_fp16")]; tensor var_4558_cast_fp16 = mul(x = x_617_cast_fp16, y = norm_191_cast_fp16)[name = string("op_4558_cast_fp16")]; tensor layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368283136)))]; tensor var_4559_cast_fp16 = mul(x = var_4558_cast_fp16, y = layers_23_post_attention_layernorm_weight_to_fp16)[name = string("op_4559_cast_fp16")]; tensor layers_23_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368285248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371431040))))[name = string("layers_23_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_gate_proj_weight_to_fp16_palettized, x = var_4559_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor var_4569_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_4569_cast_fp16")]; tensor layers_23_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371431616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374577408))))[name = string("layers_23_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_up_proj_weight_to_fp16_palettized, x = var_4559_cast_fp16)[name = string("linear_166_cast_fp16")]; tensor input_239_cast_fp16 = mul(x = var_4569_cast_fp16, y = linear_166_cast_fp16)[name = string("input_239_cast_fp16")]; tensor layers_23_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374577984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377723776))))[name = string("layers_23_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_167_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_mlp_down_proj_weight_to_fp16_palettized, x = input_239_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor x_623_cast_fp16 = add(x = x_617_cast_fp16, y = linear_167_cast_fp16)[name = string("x_623_cast_fp16")]; int32 var_4589 = const()[name = string("op_4589"), val = int32(-1)]; fp16 var_4588_promoted_to_fp16 = const()[name = string("op_4588_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4598_cast_fp16 = pow(x = x_623_cast_fp16, y = var_4588_promoted_to_fp16)[name = string("op_4598_cast_fp16")]; tensor var_4600_axes_0 = const()[name = string("op_4600_axes_0"), val = tensor([-1])]; bool var_4600_keep_dims_0 = const()[name = string("op_4600_keep_dims_0"), val = bool(true)]; tensor var_4600_cast_fp16 = reduce_mean(axes = var_4600_axes_0, keep_dims = var_4600_keep_dims_0, x = var_4598_cast_fp16)[name = string("op_4600_cast_fp16")]; fp16 var_4601_to_fp16 = const()[name = string("op_4601_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4602_cast_fp16 = add(x = var_4600_cast_fp16, y = var_4601_to_fp16)[name = string("op_4602_cast_fp16")]; fp32 norm_193_epsilon_0 = const()[name = string("norm_193_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_193_cast_fp16 = rsqrt(epsilon = norm_193_epsilon_0, x = var_4602_cast_fp16)[name = string("norm_193_cast_fp16")]; tensor var_4604_cast_fp16 = mul(x = x_623_cast_fp16, y = norm_193_cast_fp16)[name = string("op_4604_cast_fp16")]; tensor layers_24_input_layernorm_weight_to_fp16 = const()[name = string("layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377724352)))]; tensor var_4605_cast_fp16 = mul(x = var_4604_cast_fp16, y = layers_24_input_layernorm_weight_to_fp16)[name = string("op_4605_cast_fp16")]; tensor layers_24_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377726464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379823680))))[name = string("layers_24_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_24_self_attn_q_proj_weight_to_fp16_palettized, x = var_4605_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor var_4621 = const()[name = string("op_4621"), val = tensor([1, 1, 16, 128])]; tensor var_4622_cast_fp16 = reshape(shape = var_4621, x = linear_168_cast_fp16)[name = string("op_4622_cast_fp16")]; tensor x_629_perm_0 = const()[name = string("x_629_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_24_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379824256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380872896))))[name = string("layers_24_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_k_proj_weight_to_fp16_palettized, x = var_4605_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor var_4626 = const()[name = string("op_4626"), val = tensor([1, 1, 8, 128])]; tensor var_4627_cast_fp16 = reshape(shape = var_4626, x = linear_169_cast_fp16)[name = string("op_4627_cast_fp16")]; tensor x_633_perm_0 = const()[name = string("x_633_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_24_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381922112))))[name = string("layers_24_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_v_proj_weight_to_fp16_palettized, x = var_4605_cast_fp16)[name = string("linear_170_cast_fp16")]; tensor var_4631 = const()[name = string("op_4631"), val = tensor([1, 1, 8, 128])]; tensor var_4632_cast_fp16 = reshape(shape = var_4631, x = linear_170_cast_fp16)[name = string("op_4632_cast_fp16")]; tensor v_49_perm_0 = const()[name = string("v_49_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_4588_promoted_1_to_fp16 = const()[name = string("op_4588_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_629_cast_fp16 = transpose(perm = x_629_perm_0, x = var_4622_cast_fp16)[name = string("transpose_15")]; tensor var_4636_cast_fp16 = pow(x = x_629_cast_fp16, y = var_4588_promoted_1_to_fp16)[name = string("op_4636_cast_fp16")]; tensor var_4638_axes_0 = const()[name = string("op_4638_axes_0"), val = tensor([-1])]; bool var_4638_keep_dims_0 = const()[name = string("op_4638_keep_dims_0"), val = bool(true)]; tensor var_4638_cast_fp16 = reduce_mean(axes = var_4638_axes_0, keep_dims = var_4638_keep_dims_0, x = var_4636_cast_fp16)[name = string("op_4638_cast_fp16")]; fp16 var_4639_to_fp16 = const()[name = string("op_4639_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4640_cast_fp16 = add(x = var_4638_cast_fp16, y = var_4639_to_fp16)[name = string("op_4640_cast_fp16")]; fp32 norm_195_epsilon_0 = const()[name = string("norm_195_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_195_cast_fp16 = rsqrt(epsilon = norm_195_epsilon_0, x = var_4640_cast_fp16)[name = string("norm_195_cast_fp16")]; tensor var_4642_cast_fp16 = mul(x = x_629_cast_fp16, y = norm_195_cast_fp16)[name = string("op_4642_cast_fp16")]; tensor layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381922688)))]; tensor var_4643_cast_fp16 = mul(x = var_4642_cast_fp16, y = layers_24_self_attn_q_norm_weight_to_fp16)[name = string("op_4643_cast_fp16")]; fp16 var_4588_promoted_2_to_fp16 = const()[name = string("op_4588_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_633_cast_fp16 = transpose(perm = x_633_perm_0, x = var_4627_cast_fp16)[name = string("transpose_14")]; tensor var_4647_cast_fp16 = pow(x = x_633_cast_fp16, y = var_4588_promoted_2_to_fp16)[name = string("op_4647_cast_fp16")]; tensor var_4649_axes_0 = const()[name = string("op_4649_axes_0"), val = tensor([-1])]; bool var_4649_keep_dims_0 = const()[name = string("op_4649_keep_dims_0"), val = bool(true)]; tensor var_4649_cast_fp16 = reduce_mean(axes = var_4649_axes_0, keep_dims = var_4649_keep_dims_0, x = var_4647_cast_fp16)[name = string("op_4649_cast_fp16")]; fp16 var_4650_to_fp16 = const()[name = string("op_4650_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4651_cast_fp16 = add(x = var_4649_cast_fp16, y = var_4650_to_fp16)[name = string("op_4651_cast_fp16")]; fp32 norm_197_epsilon_0 = const()[name = string("norm_197_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_197_cast_fp16 = rsqrt(epsilon = norm_197_epsilon_0, x = var_4651_cast_fp16)[name = string("norm_197_cast_fp16")]; tensor var_4653_cast_fp16 = mul(x = x_633_cast_fp16, y = norm_197_cast_fp16)[name = string("op_4653_cast_fp16")]; tensor layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381923008)))]; tensor var_4654_cast_fp16 = mul(x = var_4653_cast_fp16, y = layers_24_self_attn_k_norm_weight_to_fp16)[name = string("op_4654_cast_fp16")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = var_4643_cast_fp16)[name = string("x1_97_cast_fp16")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = var_4643_cast_fp16)[name = string("x2_97_cast_fp16")]; tensor var_4672_cast_fp16 = mul(x = x1_97_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4672_cast_fp16")]; tensor var_4673_cast_fp16 = mul(x = x2_97_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4673_cast_fp16")]; tensor var_4674_cast_fp16 = sub(x = var_4672_cast_fp16, y = var_4673_cast_fp16)[name = string("op_4674_cast_fp16")]; tensor var_4675_cast_fp16 = mul(x = x2_97_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4675_cast_fp16")]; tensor var_4676_cast_fp16 = mul(x = x1_97_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4676_cast_fp16")]; tensor var_4677_cast_fp16 = add(x = var_4675_cast_fp16, y = var_4676_cast_fp16)[name = string("op_4677_cast_fp16")]; bool q_49_interleave_0 = const()[name = string("q_49_interleave_0"), val = bool(false)]; tensor q_49_cast_fp16 = concat(axis = var_4589, interleave = q_49_interleave_0, values = (var_4674_cast_fp16, var_4677_cast_fp16))[name = string("q_49_cast_fp16")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = var_4654_cast_fp16)[name = string("x1_99_cast_fp16")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = var_4654_cast_fp16)[name = string("x2_99_cast_fp16")]; tensor var_4696_cast_fp16 = mul(x = x1_99_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4696_cast_fp16")]; tensor var_4697_cast_fp16 = mul(x = x2_99_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4697_cast_fp16")]; tensor var_4698_cast_fp16 = sub(x = var_4696_cast_fp16, y = var_4697_cast_fp16)[name = string("op_4698_cast_fp16")]; tensor var_4699_cast_fp16 = mul(x = x2_99_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4699_cast_fp16")]; tensor var_4700_cast_fp16 = mul(x = x1_99_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4700_cast_fp16")]; tensor var_4701_cast_fp16 = add(x = var_4699_cast_fp16, y = var_4700_cast_fp16)[name = string("op_4701_cast_fp16")]; bool k_49_interleave_0 = const()[name = string("k_49_interleave_0"), val = bool(false)]; tensor k_49_cast_fp16 = concat(axis = var_4589, interleave = k_49_interleave_0, values = (var_4698_cast_fp16, var_4701_cast_fp16))[name = string("k_49_cast_fp16")]; tensor read_state_48 = read_state(input = k_cache_24)[name = string("read_state_48")]; tensor k_cache_147_cast_fp16 = mul(x = read_state_48, y = var_264_cast_fp16)[name = string("k_cache_147_cast_fp16")]; write_state(data = k_cache_147_cast_fp16, input = k_cache_24)[name = string("coreml_update_state_208_write_state")]; tensor coreml_update_state_208 = read_state(input = k_cache_24)[name = string("coreml_update_state_208")]; tensor var_4706_cast_fp16 = mul(x = k_49_cast_fp16, y = onehot_cast_fp16)[name = string("op_4706_cast_fp16")]; tensor k_cache_149_cast_fp16 = add(x = coreml_update_state_208, y = var_4706_cast_fp16)[name = string("k_cache_149_cast_fp16")]; write_state(data = k_cache_149_cast_fp16, input = k_cache_24)[name = string("coreml_update_state_209_write_state")]; tensor coreml_update_state_209 = read_state(input = k_cache_24)[name = string("coreml_update_state_209")]; tensor read_state_49 = read_state(input = v_cache_24)[name = string("read_state_49")]; tensor v_cache_147_cast_fp16 = mul(x = read_state_49, y = var_264_cast_fp16)[name = string("v_cache_147_cast_fp16")]; write_state(data = v_cache_147_cast_fp16, input = v_cache_24)[name = string("coreml_update_state_210_write_state")]; tensor coreml_update_state_210 = read_state(input = v_cache_24)[name = string("coreml_update_state_210")]; tensor v_49_cast_fp16 = transpose(perm = v_49_perm_0, x = var_4632_cast_fp16)[name = string("transpose_13")]; tensor var_4710_cast_fp16 = mul(x = v_49_cast_fp16, y = onehot_cast_fp16)[name = string("op_4710_cast_fp16")]; tensor v_cache_149_cast_fp16 = add(x = coreml_update_state_210, y = var_4710_cast_fp16)[name = string("v_cache_149_cast_fp16")]; write_state(data = v_cache_149_cast_fp16, input = v_cache_24)[name = string("coreml_update_state_211_write_state")]; tensor coreml_update_state_211 = read_state(input = v_cache_24)[name = string("coreml_update_state_211")]; tensor var_4712_axes_0 = const()[name = string("op_4712_axes_0"), val = tensor([2])]; tensor var_4712_cast_fp16 = expand_dims(axes = var_4712_axes_0, x = coreml_update_state_209)[name = string("op_4712_cast_fp16")]; tensor k_exp_97_reps_0 = const()[name = string("k_exp_97_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_97_cast_fp16 = tile(reps = k_exp_97_reps_0, x = var_4712_cast_fp16)[name = string("k_exp_97_cast_fp16")]; tensor var_4715 = const()[name = string("op_4715"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_99_cast_fp16 = reshape(shape = var_4715, x = k_exp_97_cast_fp16)[name = string("k_exp_99_cast_fp16")]; tensor var_4717_axes_0 = const()[name = string("op_4717_axes_0"), val = tensor([2])]; tensor var_4717_cast_fp16 = expand_dims(axes = var_4717_axes_0, x = coreml_update_state_211)[name = string("op_4717_cast_fp16")]; tensor v_exp_97_reps_0 = const()[name = string("v_exp_97_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_97_cast_fp16 = tile(reps = v_exp_97_reps_0, x = var_4717_cast_fp16)[name = string("v_exp_97_cast_fp16")]; tensor var_4720 = const()[name = string("op_4720"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_99_cast_fp16 = reshape(shape = var_4720, x = v_exp_97_cast_fp16)[name = string("v_exp_99_cast_fp16")]; bool var_4723_transpose_x_1 = const()[name = string("op_4723_transpose_x_1"), val = bool(false)]; bool var_4723_transpose_y_1 = const()[name = string("op_4723_transpose_y_1"), val = bool(true)]; tensor var_4723_cast_fp16 = matmul(transpose_x = var_4723_transpose_x_1, transpose_y = var_4723_transpose_y_1, x = q_49_cast_fp16, y = k_exp_99_cast_fp16)[name = string("op_4723_cast_fp16")]; fp16 var_4724_to_fp16 = const()[name = string("op_4724_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_97_cast_fp16 = mul(x = var_4723_cast_fp16, y = var_4724_to_fp16)[name = string("attn_97_cast_fp16")]; tensor input_241_cast_fp16 = add(x = attn_97_cast_fp16, y = attention_mask_to_fp16)[name = string("input_241_cast_fp16")]; tensor attn_99_cast_fp16 = softmax(axis = var_4589, x = input_241_cast_fp16)[name = string("attn_99_cast_fp16")]; bool out_49_transpose_x_0 = const()[name = string("out_49_transpose_x_0"), val = bool(false)]; bool out_49_transpose_y_0 = const()[name = string("out_49_transpose_y_0"), val = bool(false)]; tensor out_49_cast_fp16 = matmul(transpose_x = out_49_transpose_x_0, transpose_y = out_49_transpose_y_0, x = attn_99_cast_fp16, y = v_exp_99_cast_fp16)[name = string("out_49_cast_fp16")]; tensor var_4729_perm_0 = const()[name = string("op_4729_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4730 = const()[name = string("op_4730"), val = tensor([1, 1, -1])]; tensor var_4729_cast_fp16 = transpose(perm = var_4729_perm_0, x = out_49_cast_fp16)[name = string("transpose_12")]; tensor input_243_cast_fp16 = reshape(shape = var_4730, x = var_4729_cast_fp16)[name = string("input_243_cast_fp16")]; tensor layers_24_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381923328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384020544))))[name = string("layers_24_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_171_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_o_proj_weight_to_fp16_palettized, x = input_243_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor x_643_cast_fp16 = add(x = x_623_cast_fp16, y = linear_171_cast_fp16)[name = string("x_643_cast_fp16")]; fp16 var_4588_promoted_3_to_fp16 = const()[name = string("op_4588_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4737_cast_fp16 = pow(x = x_643_cast_fp16, y = var_4588_promoted_3_to_fp16)[name = string("op_4737_cast_fp16")]; tensor var_4739_axes_0 = const()[name = string("op_4739_axes_0"), val = tensor([-1])]; bool var_4739_keep_dims_0 = const()[name = string("op_4739_keep_dims_0"), val = bool(true)]; tensor var_4739_cast_fp16 = reduce_mean(axes = var_4739_axes_0, keep_dims = var_4739_keep_dims_0, x = var_4737_cast_fp16)[name = string("op_4739_cast_fp16")]; fp16 var_4740_to_fp16 = const()[name = string("op_4740_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4741_cast_fp16 = add(x = var_4739_cast_fp16, y = var_4740_to_fp16)[name = string("op_4741_cast_fp16")]; fp32 norm_199_epsilon_0 = const()[name = string("norm_199_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_199_cast_fp16 = rsqrt(epsilon = norm_199_epsilon_0, x = var_4741_cast_fp16)[name = string("norm_199_cast_fp16")]; tensor var_4743_cast_fp16 = mul(x = x_643_cast_fp16, y = norm_199_cast_fp16)[name = string("op_4743_cast_fp16")]; tensor layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384021120)))]; tensor var_4744_cast_fp16 = mul(x = var_4743_cast_fp16, y = layers_24_post_attention_layernorm_weight_to_fp16)[name = string("op_4744_cast_fp16")]; tensor layers_24_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384023232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387169024))))[name = string("layers_24_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_gate_proj_weight_to_fp16_palettized, x = var_4744_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor var_4754_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_4754_cast_fp16")]; tensor layers_24_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387169600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390315392))))[name = string("layers_24_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_up_proj_weight_to_fp16_palettized, x = var_4744_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor input_249_cast_fp16 = mul(x = var_4754_cast_fp16, y = linear_173_cast_fp16)[name = string("input_249_cast_fp16")]; tensor layers_24_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390315968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393461760))))[name = string("layers_24_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_174_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_mlp_down_proj_weight_to_fp16_palettized, x = input_249_cast_fp16)[name = string("linear_174_cast_fp16")]; tensor x_649_cast_fp16 = add(x = x_643_cast_fp16, y = linear_174_cast_fp16)[name = string("x_649_cast_fp16")]; int32 var_4774 = const()[name = string("op_4774"), val = int32(-1)]; fp16 var_4773_promoted_to_fp16 = const()[name = string("op_4773_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4783_cast_fp16 = pow(x = x_649_cast_fp16, y = var_4773_promoted_to_fp16)[name = string("op_4783_cast_fp16")]; tensor var_4785_axes_0 = const()[name = string("op_4785_axes_0"), val = tensor([-1])]; bool var_4785_keep_dims_0 = const()[name = string("op_4785_keep_dims_0"), val = bool(true)]; tensor var_4785_cast_fp16 = reduce_mean(axes = var_4785_axes_0, keep_dims = var_4785_keep_dims_0, x = var_4783_cast_fp16)[name = string("op_4785_cast_fp16")]; fp16 var_4786_to_fp16 = const()[name = string("op_4786_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4787_cast_fp16 = add(x = var_4785_cast_fp16, y = var_4786_to_fp16)[name = string("op_4787_cast_fp16")]; fp32 norm_201_epsilon_0 = const()[name = string("norm_201_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_201_cast_fp16 = rsqrt(epsilon = norm_201_epsilon_0, x = var_4787_cast_fp16)[name = string("norm_201_cast_fp16")]; tensor var_4789_cast_fp16 = mul(x = x_649_cast_fp16, y = norm_201_cast_fp16)[name = string("op_4789_cast_fp16")]; tensor layers_25_input_layernorm_weight_to_fp16 = const()[name = string("layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393462336)))]; tensor var_4790_cast_fp16 = mul(x = var_4789_cast_fp16, y = layers_25_input_layernorm_weight_to_fp16)[name = string("op_4790_cast_fp16")]; tensor layers_25_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393464448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395561664))))[name = string("layers_25_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_25_self_attn_q_proj_weight_to_fp16_palettized, x = var_4790_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor var_4806 = const()[name = string("op_4806"), val = tensor([1, 1, 16, 128])]; tensor var_4807_cast_fp16 = reshape(shape = var_4806, x = linear_175_cast_fp16)[name = string("op_4807_cast_fp16")]; tensor x_655_perm_0 = const()[name = string("x_655_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_25_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395562240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396610880))))[name = string("layers_25_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_k_proj_weight_to_fp16_palettized, x = var_4790_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor var_4811 = const()[name = string("op_4811"), val = tensor([1, 1, 8, 128])]; tensor var_4812_cast_fp16 = reshape(shape = var_4811, x = linear_176_cast_fp16)[name = string("op_4812_cast_fp16")]; tensor x_659_perm_0 = const()[name = string("x_659_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_25_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396611456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660096))))[name = string("layers_25_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_v_proj_weight_to_fp16_palettized, x = var_4790_cast_fp16)[name = string("linear_177_cast_fp16")]; tensor var_4816 = const()[name = string("op_4816"), val = tensor([1, 1, 8, 128])]; tensor var_4817_cast_fp16 = reshape(shape = var_4816, x = linear_177_cast_fp16)[name = string("op_4817_cast_fp16")]; tensor v_51_perm_0 = const()[name = string("v_51_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_4773_promoted_1_to_fp16 = const()[name = string("op_4773_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_655_cast_fp16 = transpose(perm = x_655_perm_0, x = var_4807_cast_fp16)[name = string("transpose_11")]; tensor var_4821_cast_fp16 = pow(x = x_655_cast_fp16, y = var_4773_promoted_1_to_fp16)[name = string("op_4821_cast_fp16")]; tensor var_4823_axes_0 = const()[name = string("op_4823_axes_0"), val = tensor([-1])]; bool var_4823_keep_dims_0 = const()[name = string("op_4823_keep_dims_0"), val = bool(true)]; tensor var_4823_cast_fp16 = reduce_mean(axes = var_4823_axes_0, keep_dims = var_4823_keep_dims_0, x = var_4821_cast_fp16)[name = string("op_4823_cast_fp16")]; fp16 var_4824_to_fp16 = const()[name = string("op_4824_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4825_cast_fp16 = add(x = var_4823_cast_fp16, y = var_4824_to_fp16)[name = string("op_4825_cast_fp16")]; fp32 norm_203_epsilon_0 = const()[name = string("norm_203_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_203_cast_fp16 = rsqrt(epsilon = norm_203_epsilon_0, x = var_4825_cast_fp16)[name = string("norm_203_cast_fp16")]; tensor var_4827_cast_fp16 = mul(x = x_655_cast_fp16, y = norm_203_cast_fp16)[name = string("op_4827_cast_fp16")]; tensor layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660672)))]; tensor var_4828_cast_fp16 = mul(x = var_4827_cast_fp16, y = layers_25_self_attn_q_norm_weight_to_fp16)[name = string("op_4828_cast_fp16")]; fp16 var_4773_promoted_2_to_fp16 = const()[name = string("op_4773_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_659_cast_fp16 = transpose(perm = x_659_perm_0, x = var_4812_cast_fp16)[name = string("transpose_10")]; tensor var_4832_cast_fp16 = pow(x = x_659_cast_fp16, y = var_4773_promoted_2_to_fp16)[name = string("op_4832_cast_fp16")]; tensor var_4834_axes_0 = const()[name = string("op_4834_axes_0"), val = tensor([-1])]; bool var_4834_keep_dims_0 = const()[name = string("op_4834_keep_dims_0"), val = bool(true)]; tensor var_4834_cast_fp16 = reduce_mean(axes = var_4834_axes_0, keep_dims = var_4834_keep_dims_0, x = var_4832_cast_fp16)[name = string("op_4834_cast_fp16")]; fp16 var_4835_to_fp16 = const()[name = string("op_4835_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4836_cast_fp16 = add(x = var_4834_cast_fp16, y = var_4835_to_fp16)[name = string("op_4836_cast_fp16")]; fp32 norm_205_epsilon_0 = const()[name = string("norm_205_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_205_cast_fp16 = rsqrt(epsilon = norm_205_epsilon_0, x = var_4836_cast_fp16)[name = string("norm_205_cast_fp16")]; tensor var_4838_cast_fp16 = mul(x = x_659_cast_fp16, y = norm_205_cast_fp16)[name = string("op_4838_cast_fp16")]; tensor layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660992)))]; tensor var_4839_cast_fp16 = mul(x = var_4838_cast_fp16, y = layers_25_self_attn_k_norm_weight_to_fp16)[name = string("op_4839_cast_fp16")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = var_4828_cast_fp16)[name = string("x1_101_cast_fp16")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = var_4828_cast_fp16)[name = string("x2_101_cast_fp16")]; tensor var_4857_cast_fp16 = mul(x = x1_101_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4857_cast_fp16")]; tensor var_4858_cast_fp16 = mul(x = x2_101_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4858_cast_fp16")]; tensor var_4859_cast_fp16 = sub(x = var_4857_cast_fp16, y = var_4858_cast_fp16)[name = string("op_4859_cast_fp16")]; tensor var_4860_cast_fp16 = mul(x = x2_101_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4860_cast_fp16")]; tensor var_4861_cast_fp16 = mul(x = x1_101_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4861_cast_fp16")]; tensor var_4862_cast_fp16 = add(x = var_4860_cast_fp16, y = var_4861_cast_fp16)[name = string("op_4862_cast_fp16")]; bool q_51_interleave_0 = const()[name = string("q_51_interleave_0"), val = bool(false)]; tensor q_51_cast_fp16 = concat(axis = var_4774, interleave = q_51_interleave_0, values = (var_4859_cast_fp16, var_4862_cast_fp16))[name = string("q_51_cast_fp16")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = var_4839_cast_fp16)[name = string("x1_103_cast_fp16")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = var_4839_cast_fp16)[name = string("x2_103_cast_fp16")]; tensor var_4881_cast_fp16 = mul(x = x1_103_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4881_cast_fp16")]; tensor var_4882_cast_fp16 = mul(x = x2_103_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4882_cast_fp16")]; tensor var_4883_cast_fp16 = sub(x = var_4881_cast_fp16, y = var_4882_cast_fp16)[name = string("op_4883_cast_fp16")]; tensor var_4884_cast_fp16 = mul(x = x2_103_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_4884_cast_fp16")]; tensor var_4885_cast_fp16 = mul(x = x1_103_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_4885_cast_fp16")]; tensor var_4886_cast_fp16 = add(x = var_4884_cast_fp16, y = var_4885_cast_fp16)[name = string("op_4886_cast_fp16")]; bool k_51_interleave_0 = const()[name = string("k_51_interleave_0"), val = bool(false)]; tensor k_51_cast_fp16 = concat(axis = var_4774, interleave = k_51_interleave_0, values = (var_4883_cast_fp16, var_4886_cast_fp16))[name = string("k_51_cast_fp16")]; tensor read_state_50 = read_state(input = k_cache_25)[name = string("read_state_50")]; tensor k_cache_153_cast_fp16 = mul(x = read_state_50, y = var_264_cast_fp16)[name = string("k_cache_153_cast_fp16")]; write_state(data = k_cache_153_cast_fp16, input = k_cache_25)[name = string("coreml_update_state_212_write_state")]; tensor coreml_update_state_212 = read_state(input = k_cache_25)[name = string("coreml_update_state_212")]; tensor var_4891_cast_fp16 = mul(x = k_51_cast_fp16, y = onehot_cast_fp16)[name = string("op_4891_cast_fp16")]; tensor k_cache_155_cast_fp16 = add(x = coreml_update_state_212, y = var_4891_cast_fp16)[name = string("k_cache_155_cast_fp16")]; write_state(data = k_cache_155_cast_fp16, input = k_cache_25)[name = string("coreml_update_state_213_write_state")]; tensor coreml_update_state_213 = read_state(input = k_cache_25)[name = string("coreml_update_state_213")]; tensor read_state_51 = read_state(input = v_cache_25)[name = string("read_state_51")]; tensor v_cache_153_cast_fp16 = mul(x = read_state_51, y = var_264_cast_fp16)[name = string("v_cache_153_cast_fp16")]; write_state(data = v_cache_153_cast_fp16, input = v_cache_25)[name = string("coreml_update_state_214_write_state")]; tensor coreml_update_state_214 = read_state(input = v_cache_25)[name = string("coreml_update_state_214")]; tensor v_51_cast_fp16 = transpose(perm = v_51_perm_0, x = var_4817_cast_fp16)[name = string("transpose_9")]; tensor var_4895_cast_fp16 = mul(x = v_51_cast_fp16, y = onehot_cast_fp16)[name = string("op_4895_cast_fp16")]; tensor v_cache_155_cast_fp16 = add(x = coreml_update_state_214, y = var_4895_cast_fp16)[name = string("v_cache_155_cast_fp16")]; write_state(data = v_cache_155_cast_fp16, input = v_cache_25)[name = string("coreml_update_state_215_write_state")]; tensor coreml_update_state_215 = read_state(input = v_cache_25)[name = string("coreml_update_state_215")]; tensor var_4897_axes_0 = const()[name = string("op_4897_axes_0"), val = tensor([2])]; tensor var_4897_cast_fp16 = expand_dims(axes = var_4897_axes_0, x = coreml_update_state_213)[name = string("op_4897_cast_fp16")]; tensor k_exp_101_reps_0 = const()[name = string("k_exp_101_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_101_cast_fp16 = tile(reps = k_exp_101_reps_0, x = var_4897_cast_fp16)[name = string("k_exp_101_cast_fp16")]; tensor var_4900 = const()[name = string("op_4900"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_103_cast_fp16 = reshape(shape = var_4900, x = k_exp_101_cast_fp16)[name = string("k_exp_103_cast_fp16")]; tensor var_4902_axes_0 = const()[name = string("op_4902_axes_0"), val = tensor([2])]; tensor var_4902_cast_fp16 = expand_dims(axes = var_4902_axes_0, x = coreml_update_state_215)[name = string("op_4902_cast_fp16")]; tensor v_exp_101_reps_0 = const()[name = string("v_exp_101_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_101_cast_fp16 = tile(reps = v_exp_101_reps_0, x = var_4902_cast_fp16)[name = string("v_exp_101_cast_fp16")]; tensor var_4905 = const()[name = string("op_4905"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_103_cast_fp16 = reshape(shape = var_4905, x = v_exp_101_cast_fp16)[name = string("v_exp_103_cast_fp16")]; bool var_4908_transpose_x_1 = const()[name = string("op_4908_transpose_x_1"), val = bool(false)]; bool var_4908_transpose_y_1 = const()[name = string("op_4908_transpose_y_1"), val = bool(true)]; tensor var_4908_cast_fp16 = matmul(transpose_x = var_4908_transpose_x_1, transpose_y = var_4908_transpose_y_1, x = q_51_cast_fp16, y = k_exp_103_cast_fp16)[name = string("op_4908_cast_fp16")]; fp16 var_4909_to_fp16 = const()[name = string("op_4909_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_101_cast_fp16 = mul(x = var_4908_cast_fp16, y = var_4909_to_fp16)[name = string("attn_101_cast_fp16")]; tensor input_251_cast_fp16 = add(x = attn_101_cast_fp16, y = attention_mask_to_fp16)[name = string("input_251_cast_fp16")]; tensor attn_103_cast_fp16 = softmax(axis = var_4774, x = input_251_cast_fp16)[name = string("attn_103_cast_fp16")]; bool out_51_transpose_x_0 = const()[name = string("out_51_transpose_x_0"), val = bool(false)]; bool out_51_transpose_y_0 = const()[name = string("out_51_transpose_y_0"), val = bool(false)]; tensor out_51_cast_fp16 = matmul(transpose_x = out_51_transpose_x_0, transpose_y = out_51_transpose_y_0, x = attn_103_cast_fp16, y = v_exp_103_cast_fp16)[name = string("out_51_cast_fp16")]; tensor var_4914_perm_0 = const()[name = string("op_4914_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4915 = const()[name = string("op_4915"), val = tensor([1, 1, -1])]; tensor var_4914_cast_fp16 = transpose(perm = var_4914_perm_0, x = out_51_cast_fp16)[name = string("transpose_8")]; tensor input_253_cast_fp16 = reshape(shape = var_4915, x = var_4914_cast_fp16)[name = string("input_253_cast_fp16")]; tensor layers_25_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397661312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399758528))))[name = string("layers_25_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_178_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_o_proj_weight_to_fp16_palettized, x = input_253_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor x_669_cast_fp16 = add(x = x_649_cast_fp16, y = linear_178_cast_fp16)[name = string("x_669_cast_fp16")]; fp16 var_4773_promoted_3_to_fp16 = const()[name = string("op_4773_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_4922_cast_fp16 = pow(x = x_669_cast_fp16, y = var_4773_promoted_3_to_fp16)[name = string("op_4922_cast_fp16")]; tensor var_4924_axes_0 = const()[name = string("op_4924_axes_0"), val = tensor([-1])]; bool var_4924_keep_dims_0 = const()[name = string("op_4924_keep_dims_0"), val = bool(true)]; tensor var_4924_cast_fp16 = reduce_mean(axes = var_4924_axes_0, keep_dims = var_4924_keep_dims_0, x = var_4922_cast_fp16)[name = string("op_4924_cast_fp16")]; fp16 var_4925_to_fp16 = const()[name = string("op_4925_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4926_cast_fp16 = add(x = var_4924_cast_fp16, y = var_4925_to_fp16)[name = string("op_4926_cast_fp16")]; fp32 norm_207_epsilon_0 = const()[name = string("norm_207_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_207_cast_fp16 = rsqrt(epsilon = norm_207_epsilon_0, x = var_4926_cast_fp16)[name = string("norm_207_cast_fp16")]; tensor var_4928_cast_fp16 = mul(x = x_669_cast_fp16, y = norm_207_cast_fp16)[name = string("op_4928_cast_fp16")]; tensor layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399759104)))]; tensor var_4929_cast_fp16 = mul(x = var_4928_cast_fp16, y = layers_25_post_attention_layernorm_weight_to_fp16)[name = string("op_4929_cast_fp16")]; tensor layers_25_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399761216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402907008))))[name = string("layers_25_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_gate_proj_weight_to_fp16_palettized, x = var_4929_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor var_4939_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_4939_cast_fp16")]; tensor layers_25_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406053376))))[name = string("layers_25_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_up_proj_weight_to_fp16_palettized, x = var_4929_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor input_259_cast_fp16 = mul(x = var_4939_cast_fp16, y = linear_180_cast_fp16)[name = string("input_259_cast_fp16")]; tensor layers_25_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406053952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409199744))))[name = string("layers_25_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_mlp_down_proj_weight_to_fp16_palettized, x = input_259_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor x_675_cast_fp16 = add(x = x_669_cast_fp16, y = linear_181_cast_fp16)[name = string("x_675_cast_fp16")]; int32 var_4959 = const()[name = string("op_4959"), val = int32(-1)]; fp16 var_4958_promoted_to_fp16 = const()[name = string("op_4958_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4968_cast_fp16 = pow(x = x_675_cast_fp16, y = var_4958_promoted_to_fp16)[name = string("op_4968_cast_fp16")]; tensor var_4970_axes_0 = const()[name = string("op_4970_axes_0"), val = tensor([-1])]; bool var_4970_keep_dims_0 = const()[name = string("op_4970_keep_dims_0"), val = bool(true)]; tensor var_4970_cast_fp16 = reduce_mean(axes = var_4970_axes_0, keep_dims = var_4970_keep_dims_0, x = var_4968_cast_fp16)[name = string("op_4970_cast_fp16")]; fp16 var_4971_to_fp16 = const()[name = string("op_4971_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4972_cast_fp16 = add(x = var_4970_cast_fp16, y = var_4971_to_fp16)[name = string("op_4972_cast_fp16")]; fp32 norm_209_epsilon_0 = const()[name = string("norm_209_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_209_cast_fp16 = rsqrt(epsilon = norm_209_epsilon_0, x = var_4972_cast_fp16)[name = string("norm_209_cast_fp16")]; tensor var_4974_cast_fp16 = mul(x = x_675_cast_fp16, y = norm_209_cast_fp16)[name = string("op_4974_cast_fp16")]; tensor layers_26_input_layernorm_weight_to_fp16 = const()[name = string("layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409200320)))]; tensor var_4975_cast_fp16 = mul(x = var_4974_cast_fp16, y = layers_26_input_layernorm_weight_to_fp16)[name = string("op_4975_cast_fp16")]; tensor layers_26_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409202432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411299648))))[name = string("layers_26_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_26_self_attn_q_proj_weight_to_fp16_palettized, x = var_4975_cast_fp16)[name = string("linear_182_cast_fp16")]; tensor var_4991 = const()[name = string("op_4991"), val = tensor([1, 1, 16, 128])]; tensor var_4992_cast_fp16 = reshape(shape = var_4991, x = linear_182_cast_fp16)[name = string("op_4992_cast_fp16")]; tensor x_681_perm_0 = const()[name = string("x_681_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_26_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411300224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412348864))))[name = string("layers_26_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_k_proj_weight_to_fp16_palettized, x = var_4975_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor var_4996 = const()[name = string("op_4996"), val = tensor([1, 1, 8, 128])]; tensor var_4997_cast_fp16 = reshape(shape = var_4996, x = linear_183_cast_fp16)[name = string("op_4997_cast_fp16")]; tensor x_685_perm_0 = const()[name = string("x_685_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_26_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412349440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398080))))[name = string("layers_26_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_v_proj_weight_to_fp16_palettized, x = var_4975_cast_fp16)[name = string("linear_184_cast_fp16")]; tensor var_5001 = const()[name = string("op_5001"), val = tensor([1, 1, 8, 128])]; tensor var_5002_cast_fp16 = reshape(shape = var_5001, x = linear_184_cast_fp16)[name = string("op_5002_cast_fp16")]; tensor v_53_perm_0 = const()[name = string("v_53_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_4958_promoted_1_to_fp16 = const()[name = string("op_4958_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_681_cast_fp16 = transpose(perm = x_681_perm_0, x = var_4992_cast_fp16)[name = string("transpose_7")]; tensor var_5006_cast_fp16 = pow(x = x_681_cast_fp16, y = var_4958_promoted_1_to_fp16)[name = string("op_5006_cast_fp16")]; tensor var_5008_axes_0 = const()[name = string("op_5008_axes_0"), val = tensor([-1])]; bool var_5008_keep_dims_0 = const()[name = string("op_5008_keep_dims_0"), val = bool(true)]; tensor var_5008_cast_fp16 = reduce_mean(axes = var_5008_axes_0, keep_dims = var_5008_keep_dims_0, x = var_5006_cast_fp16)[name = string("op_5008_cast_fp16")]; fp16 var_5009_to_fp16 = const()[name = string("op_5009_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5010_cast_fp16 = add(x = var_5008_cast_fp16, y = var_5009_to_fp16)[name = string("op_5010_cast_fp16")]; fp32 norm_211_epsilon_0 = const()[name = string("norm_211_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_211_cast_fp16 = rsqrt(epsilon = norm_211_epsilon_0, x = var_5010_cast_fp16)[name = string("norm_211_cast_fp16")]; tensor var_5012_cast_fp16 = mul(x = x_681_cast_fp16, y = norm_211_cast_fp16)[name = string("op_5012_cast_fp16")]; tensor layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398656)))]; tensor var_5013_cast_fp16 = mul(x = var_5012_cast_fp16, y = layers_26_self_attn_q_norm_weight_to_fp16)[name = string("op_5013_cast_fp16")]; fp16 var_4958_promoted_2_to_fp16 = const()[name = string("op_4958_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_685_cast_fp16 = transpose(perm = x_685_perm_0, x = var_4997_cast_fp16)[name = string("transpose_6")]; tensor var_5017_cast_fp16 = pow(x = x_685_cast_fp16, y = var_4958_promoted_2_to_fp16)[name = string("op_5017_cast_fp16")]; tensor var_5019_axes_0 = const()[name = string("op_5019_axes_0"), val = tensor([-1])]; bool var_5019_keep_dims_0 = const()[name = string("op_5019_keep_dims_0"), val = bool(true)]; tensor var_5019_cast_fp16 = reduce_mean(axes = var_5019_axes_0, keep_dims = var_5019_keep_dims_0, x = var_5017_cast_fp16)[name = string("op_5019_cast_fp16")]; fp16 var_5020_to_fp16 = const()[name = string("op_5020_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5021_cast_fp16 = add(x = var_5019_cast_fp16, y = var_5020_to_fp16)[name = string("op_5021_cast_fp16")]; fp32 norm_213_epsilon_0 = const()[name = string("norm_213_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_213_cast_fp16 = rsqrt(epsilon = norm_213_epsilon_0, x = var_5021_cast_fp16)[name = string("norm_213_cast_fp16")]; tensor var_5023_cast_fp16 = mul(x = x_685_cast_fp16, y = norm_213_cast_fp16)[name = string("op_5023_cast_fp16")]; tensor layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398976)))]; tensor var_5024_cast_fp16 = mul(x = var_5023_cast_fp16, y = layers_26_self_attn_k_norm_weight_to_fp16)[name = string("op_5024_cast_fp16")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = var_5013_cast_fp16)[name = string("x1_105_cast_fp16")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = var_5013_cast_fp16)[name = string("x2_105_cast_fp16")]; tensor var_5042_cast_fp16 = mul(x = x1_105_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5042_cast_fp16")]; tensor var_5043_cast_fp16 = mul(x = x2_105_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5043_cast_fp16")]; tensor var_5044_cast_fp16 = sub(x = var_5042_cast_fp16, y = var_5043_cast_fp16)[name = string("op_5044_cast_fp16")]; tensor var_5045_cast_fp16 = mul(x = x2_105_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5045_cast_fp16")]; tensor var_5046_cast_fp16 = mul(x = x1_105_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5046_cast_fp16")]; tensor var_5047_cast_fp16 = add(x = var_5045_cast_fp16, y = var_5046_cast_fp16)[name = string("op_5047_cast_fp16")]; bool q_53_interleave_0 = const()[name = string("q_53_interleave_0"), val = bool(false)]; tensor q_53_cast_fp16 = concat(axis = var_4959, interleave = q_53_interleave_0, values = (var_5044_cast_fp16, var_5047_cast_fp16))[name = string("q_53_cast_fp16")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = var_5024_cast_fp16)[name = string("x1_107_cast_fp16")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = var_5024_cast_fp16)[name = string("x2_107_cast_fp16")]; tensor var_5066_cast_fp16 = mul(x = x1_107_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5066_cast_fp16")]; tensor var_5067_cast_fp16 = mul(x = x2_107_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5067_cast_fp16")]; tensor var_5068_cast_fp16 = sub(x = var_5066_cast_fp16, y = var_5067_cast_fp16)[name = string("op_5068_cast_fp16")]; tensor var_5069_cast_fp16 = mul(x = x2_107_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5069_cast_fp16")]; tensor var_5070_cast_fp16 = mul(x = x1_107_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5070_cast_fp16")]; tensor var_5071_cast_fp16 = add(x = var_5069_cast_fp16, y = var_5070_cast_fp16)[name = string("op_5071_cast_fp16")]; bool k_53_interleave_0 = const()[name = string("k_53_interleave_0"), val = bool(false)]; tensor k_53_cast_fp16 = concat(axis = var_4959, interleave = k_53_interleave_0, values = (var_5068_cast_fp16, var_5071_cast_fp16))[name = string("k_53_cast_fp16")]; tensor read_state_52 = read_state(input = k_cache_26)[name = string("read_state_52")]; tensor k_cache_159_cast_fp16 = mul(x = read_state_52, y = var_264_cast_fp16)[name = string("k_cache_159_cast_fp16")]; write_state(data = k_cache_159_cast_fp16, input = k_cache_26)[name = string("coreml_update_state_216_write_state")]; tensor coreml_update_state_216 = read_state(input = k_cache_26)[name = string("coreml_update_state_216")]; tensor var_5076_cast_fp16 = mul(x = k_53_cast_fp16, y = onehot_cast_fp16)[name = string("op_5076_cast_fp16")]; tensor k_cache_161_cast_fp16 = add(x = coreml_update_state_216, y = var_5076_cast_fp16)[name = string("k_cache_161_cast_fp16")]; write_state(data = k_cache_161_cast_fp16, input = k_cache_26)[name = string("coreml_update_state_217_write_state")]; tensor coreml_update_state_217 = read_state(input = k_cache_26)[name = string("coreml_update_state_217")]; tensor read_state_53 = read_state(input = v_cache_26)[name = string("read_state_53")]; tensor v_cache_159_cast_fp16 = mul(x = read_state_53, y = var_264_cast_fp16)[name = string("v_cache_159_cast_fp16")]; write_state(data = v_cache_159_cast_fp16, input = v_cache_26)[name = string("coreml_update_state_218_write_state")]; tensor coreml_update_state_218 = read_state(input = v_cache_26)[name = string("coreml_update_state_218")]; tensor v_53_cast_fp16 = transpose(perm = v_53_perm_0, x = var_5002_cast_fp16)[name = string("transpose_5")]; tensor var_5080_cast_fp16 = mul(x = v_53_cast_fp16, y = onehot_cast_fp16)[name = string("op_5080_cast_fp16")]; tensor v_cache_161_cast_fp16 = add(x = coreml_update_state_218, y = var_5080_cast_fp16)[name = string("v_cache_161_cast_fp16")]; write_state(data = v_cache_161_cast_fp16, input = v_cache_26)[name = string("coreml_update_state_219_write_state")]; tensor coreml_update_state_219 = read_state(input = v_cache_26)[name = string("coreml_update_state_219")]; tensor var_5082_axes_0 = const()[name = string("op_5082_axes_0"), val = tensor([2])]; tensor var_5082_cast_fp16 = expand_dims(axes = var_5082_axes_0, x = coreml_update_state_217)[name = string("op_5082_cast_fp16")]; tensor k_exp_105_reps_0 = const()[name = string("k_exp_105_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_105_cast_fp16 = tile(reps = k_exp_105_reps_0, x = var_5082_cast_fp16)[name = string("k_exp_105_cast_fp16")]; tensor var_5085 = const()[name = string("op_5085"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_107_cast_fp16 = reshape(shape = var_5085, x = k_exp_105_cast_fp16)[name = string("k_exp_107_cast_fp16")]; tensor var_5087_axes_0 = const()[name = string("op_5087_axes_0"), val = tensor([2])]; tensor var_5087_cast_fp16 = expand_dims(axes = var_5087_axes_0, x = coreml_update_state_219)[name = string("op_5087_cast_fp16")]; tensor v_exp_105_reps_0 = const()[name = string("v_exp_105_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_105_cast_fp16 = tile(reps = v_exp_105_reps_0, x = var_5087_cast_fp16)[name = string("v_exp_105_cast_fp16")]; tensor var_5090 = const()[name = string("op_5090"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_107_cast_fp16 = reshape(shape = var_5090, x = v_exp_105_cast_fp16)[name = string("v_exp_107_cast_fp16")]; bool var_5093_transpose_x_1 = const()[name = string("op_5093_transpose_x_1"), val = bool(false)]; bool var_5093_transpose_y_1 = const()[name = string("op_5093_transpose_y_1"), val = bool(true)]; tensor var_5093_cast_fp16 = matmul(transpose_x = var_5093_transpose_x_1, transpose_y = var_5093_transpose_y_1, x = q_53_cast_fp16, y = k_exp_107_cast_fp16)[name = string("op_5093_cast_fp16")]; fp16 var_5094_to_fp16 = const()[name = string("op_5094_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_105_cast_fp16 = mul(x = var_5093_cast_fp16, y = var_5094_to_fp16)[name = string("attn_105_cast_fp16")]; tensor input_261_cast_fp16 = add(x = attn_105_cast_fp16, y = attention_mask_to_fp16)[name = string("input_261_cast_fp16")]; tensor attn_107_cast_fp16 = softmax(axis = var_4959, x = input_261_cast_fp16)[name = string("attn_107_cast_fp16")]; bool out_53_transpose_x_0 = const()[name = string("out_53_transpose_x_0"), val = bool(false)]; bool out_53_transpose_y_0 = const()[name = string("out_53_transpose_y_0"), val = bool(false)]; tensor out_53_cast_fp16 = matmul(transpose_x = out_53_transpose_x_0, transpose_y = out_53_transpose_y_0, x = attn_107_cast_fp16, y = v_exp_107_cast_fp16)[name = string("out_53_cast_fp16")]; tensor var_5099_perm_0 = const()[name = string("op_5099_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5100 = const()[name = string("op_5100"), val = tensor([1, 1, -1])]; tensor var_5099_cast_fp16 = transpose(perm = var_5099_perm_0, x = out_53_cast_fp16)[name = string("transpose_4")]; tensor input_263_cast_fp16 = reshape(shape = var_5100, x = var_5099_cast_fp16)[name = string("input_263_cast_fp16")]; tensor layers_26_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413399296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415496512))))[name = string("layers_26_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_o_proj_weight_to_fp16_palettized, x = input_263_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor x_695_cast_fp16 = add(x = x_675_cast_fp16, y = linear_185_cast_fp16)[name = string("x_695_cast_fp16")]; fp16 var_4958_promoted_3_to_fp16 = const()[name = string("op_4958_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_5107_cast_fp16 = pow(x = x_695_cast_fp16, y = var_4958_promoted_3_to_fp16)[name = string("op_5107_cast_fp16")]; tensor var_5109_axes_0 = const()[name = string("op_5109_axes_0"), val = tensor([-1])]; bool var_5109_keep_dims_0 = const()[name = string("op_5109_keep_dims_0"), val = bool(true)]; tensor var_5109_cast_fp16 = reduce_mean(axes = var_5109_axes_0, keep_dims = var_5109_keep_dims_0, x = var_5107_cast_fp16)[name = string("op_5109_cast_fp16")]; fp16 var_5110_to_fp16 = const()[name = string("op_5110_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5111_cast_fp16 = add(x = var_5109_cast_fp16, y = var_5110_to_fp16)[name = string("op_5111_cast_fp16")]; fp32 norm_215_epsilon_0 = const()[name = string("norm_215_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_215_cast_fp16 = rsqrt(epsilon = norm_215_epsilon_0, x = var_5111_cast_fp16)[name = string("norm_215_cast_fp16")]; tensor var_5113_cast_fp16 = mul(x = x_695_cast_fp16, y = norm_215_cast_fp16)[name = string("op_5113_cast_fp16")]; tensor layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415497088)))]; tensor var_5114_cast_fp16 = mul(x = var_5113_cast_fp16, y = layers_26_post_attention_layernorm_weight_to_fp16)[name = string("op_5114_cast_fp16")]; tensor layers_26_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415499200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418644992))))[name = string("layers_26_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_gate_proj_weight_to_fp16_palettized, x = var_5114_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_5124_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_5124_cast_fp16")]; tensor layers_26_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418645568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421791360))))[name = string("layers_26_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_up_proj_weight_to_fp16_palettized, x = var_5114_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor input_269_cast_fp16 = mul(x = var_5124_cast_fp16, y = linear_187_cast_fp16)[name = string("input_269_cast_fp16")]; tensor layers_26_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421791936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424937728))))[name = string("layers_26_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_mlp_down_proj_weight_to_fp16_palettized, x = input_269_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor x_701_cast_fp16 = add(x = x_695_cast_fp16, y = linear_188_cast_fp16)[name = string("x_701_cast_fp16")]; int32 var_5144 = const()[name = string("op_5144"), val = int32(-1)]; fp16 var_5143_promoted_to_fp16 = const()[name = string("op_5143_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5153_cast_fp16 = pow(x = x_701_cast_fp16, y = var_5143_promoted_to_fp16)[name = string("op_5153_cast_fp16")]; tensor var_5155_axes_0 = const()[name = string("op_5155_axes_0"), val = tensor([-1])]; bool var_5155_keep_dims_0 = const()[name = string("op_5155_keep_dims_0"), val = bool(true)]; tensor var_5155_cast_fp16 = reduce_mean(axes = var_5155_axes_0, keep_dims = var_5155_keep_dims_0, x = var_5153_cast_fp16)[name = string("op_5155_cast_fp16")]; fp16 var_5156_to_fp16 = const()[name = string("op_5156_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5157_cast_fp16 = add(x = var_5155_cast_fp16, y = var_5156_to_fp16)[name = string("op_5157_cast_fp16")]; fp32 norm_217_epsilon_0 = const()[name = string("norm_217_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_217_cast_fp16 = rsqrt(epsilon = norm_217_epsilon_0, x = var_5157_cast_fp16)[name = string("norm_217_cast_fp16")]; tensor var_5159_cast_fp16 = mul(x = x_701_cast_fp16, y = norm_217_cast_fp16)[name = string("op_5159_cast_fp16")]; tensor layers_27_input_layernorm_weight_to_fp16 = const()[name = string("layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424938304)))]; tensor var_5160_cast_fp16 = mul(x = var_5159_cast_fp16, y = layers_27_input_layernorm_weight_to_fp16)[name = string("op_5160_cast_fp16")]; tensor layers_27_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424940416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427037632))))[name = string("layers_27_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_27_self_attn_q_proj_weight_to_fp16_palettized, x = var_5160_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor var_5176 = const()[name = string("op_5176"), val = tensor([1, 1, 16, 128])]; tensor var_5177_cast_fp16 = reshape(shape = var_5176, x = linear_189_cast_fp16)[name = string("op_5177_cast_fp16")]; tensor x_707_perm_0 = const()[name = string("x_707_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_27_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427038208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428086848))))[name = string("layers_27_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_k_proj_weight_to_fp16_palettized, x = var_5160_cast_fp16)[name = string("linear_190_cast_fp16")]; tensor var_5181 = const()[name = string("op_5181"), val = tensor([1, 1, 8, 128])]; tensor var_5182_cast_fp16 = reshape(shape = var_5181, x = linear_190_cast_fp16)[name = string("op_5182_cast_fp16")]; tensor x_711_perm_0 = const()[name = string("x_711_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_27_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136064))))[name = string("layers_27_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_v_proj_weight_to_fp16_palettized, x = var_5160_cast_fp16)[name = string("linear_191_cast_fp16")]; tensor var_5186 = const()[name = string("op_5186"), val = tensor([1, 1, 8, 128])]; tensor var_5187_cast_fp16 = reshape(shape = var_5186, x = linear_191_cast_fp16)[name = string("op_5187_cast_fp16")]; tensor v_perm_0 = const()[name = string("v_perm_0"), val = tensor([0, 2, 1, 3])]; fp16 var_5143_promoted_1_to_fp16 = const()[name = string("op_5143_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_707_cast_fp16 = transpose(perm = x_707_perm_0, x = var_5177_cast_fp16)[name = string("transpose_3")]; tensor var_5191_cast_fp16 = pow(x = x_707_cast_fp16, y = var_5143_promoted_1_to_fp16)[name = string("op_5191_cast_fp16")]; tensor var_5193_axes_0 = const()[name = string("op_5193_axes_0"), val = tensor([-1])]; bool var_5193_keep_dims_0 = const()[name = string("op_5193_keep_dims_0"), val = bool(true)]; tensor var_5193_cast_fp16 = reduce_mean(axes = var_5193_axes_0, keep_dims = var_5193_keep_dims_0, x = var_5191_cast_fp16)[name = string("op_5193_cast_fp16")]; fp16 var_5194_to_fp16 = const()[name = string("op_5194_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5195_cast_fp16 = add(x = var_5193_cast_fp16, y = var_5194_to_fp16)[name = string("op_5195_cast_fp16")]; fp32 norm_219_epsilon_0 = const()[name = string("norm_219_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_219_cast_fp16 = rsqrt(epsilon = norm_219_epsilon_0, x = var_5195_cast_fp16)[name = string("norm_219_cast_fp16")]; tensor var_5197_cast_fp16 = mul(x = x_707_cast_fp16, y = norm_219_cast_fp16)[name = string("op_5197_cast_fp16")]; tensor layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136640)))]; tensor var_5198_cast_fp16 = mul(x = var_5197_cast_fp16, y = layers_27_self_attn_q_norm_weight_to_fp16)[name = string("op_5198_cast_fp16")]; fp16 var_5143_promoted_2_to_fp16 = const()[name = string("op_5143_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_711_cast_fp16 = transpose(perm = x_711_perm_0, x = var_5182_cast_fp16)[name = string("transpose_2")]; tensor var_5202_cast_fp16 = pow(x = x_711_cast_fp16, y = var_5143_promoted_2_to_fp16)[name = string("op_5202_cast_fp16")]; tensor var_5204_axes_0 = const()[name = string("op_5204_axes_0"), val = tensor([-1])]; bool var_5204_keep_dims_0 = const()[name = string("op_5204_keep_dims_0"), val = bool(true)]; tensor var_5204_cast_fp16 = reduce_mean(axes = var_5204_axes_0, keep_dims = var_5204_keep_dims_0, x = var_5202_cast_fp16)[name = string("op_5204_cast_fp16")]; fp16 var_5205_to_fp16 = const()[name = string("op_5205_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5206_cast_fp16 = add(x = var_5204_cast_fp16, y = var_5205_to_fp16)[name = string("op_5206_cast_fp16")]; fp32 norm_221_epsilon_0 = const()[name = string("norm_221_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_221_cast_fp16 = rsqrt(epsilon = norm_221_epsilon_0, x = var_5206_cast_fp16)[name = string("norm_221_cast_fp16")]; tensor var_5208_cast_fp16 = mul(x = x_711_cast_fp16, y = norm_221_cast_fp16)[name = string("op_5208_cast_fp16")]; tensor layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136960)))]; tensor var_5209_cast_fp16 = mul(x = var_5208_cast_fp16, y = layers_27_self_attn_k_norm_weight_to_fp16)[name = string("op_5209_cast_fp16")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 1, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = var_5198_cast_fp16)[name = string("x1_109_cast_fp16")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 1, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = var_5198_cast_fp16)[name = string("x2_109_cast_fp16")]; tensor var_5227_cast_fp16 = mul(x = x1_109_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5227_cast_fp16")]; tensor var_5228_cast_fp16 = mul(x = x2_109_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5228_cast_fp16")]; tensor var_5229_cast_fp16 = sub(x = var_5227_cast_fp16, y = var_5228_cast_fp16)[name = string("op_5229_cast_fp16")]; tensor var_5230_cast_fp16 = mul(x = x2_109_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5230_cast_fp16")]; tensor var_5231_cast_fp16 = mul(x = x1_109_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5231_cast_fp16")]; tensor var_5232_cast_fp16 = add(x = var_5230_cast_fp16, y = var_5231_cast_fp16)[name = string("op_5232_cast_fp16")]; bool q_interleave_0 = const()[name = string("q_interleave_0"), val = bool(false)]; tensor q_cast_fp16 = concat(axis = var_5144, interleave = q_interleave_0, values = (var_5229_cast_fp16, var_5232_cast_fp16))[name = string("q_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 1, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_5209_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 1, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_5209_cast_fp16)[name = string("x2_cast_fp16")]; tensor var_5251_cast_fp16 = mul(x = x1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5251_cast_fp16")]; tensor var_5252_cast_fp16 = mul(x = x2_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5252_cast_fp16")]; tensor var_5253_cast_fp16 = sub(x = var_5251_cast_fp16, y = var_5252_cast_fp16)[name = string("op_5253_cast_fp16")]; tensor var_5254_cast_fp16 = mul(x = x2_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_5254_cast_fp16")]; tensor var_5255_cast_fp16 = mul(x = x1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_5255_cast_fp16")]; tensor var_5256_cast_fp16 = add(x = var_5254_cast_fp16, y = var_5255_cast_fp16)[name = string("op_5256_cast_fp16")]; bool k_interleave_0 = const()[name = string("k_interleave_0"), val = bool(false)]; tensor k_cast_fp16 = concat(axis = var_5144, interleave = k_interleave_0, values = (var_5253_cast_fp16, var_5256_cast_fp16))[name = string("k_cast_fp16")]; tensor read_state_54 = read_state(input = k_cache_27)[name = string("read_state_54")]; tensor k_cache_165_cast_fp16 = mul(x = read_state_54, y = var_264_cast_fp16)[name = string("k_cache_165_cast_fp16")]; write_state(data = k_cache_165_cast_fp16, input = k_cache_27)[name = string("coreml_update_state_220_write_state")]; tensor coreml_update_state_220 = read_state(input = k_cache_27)[name = string("coreml_update_state_220")]; tensor var_5261_cast_fp16 = mul(x = k_cast_fp16, y = onehot_cast_fp16)[name = string("op_5261_cast_fp16")]; tensor k_cache_cast_fp16 = add(x = coreml_update_state_220, y = var_5261_cast_fp16)[name = string("k_cache_cast_fp16")]; write_state(data = k_cache_cast_fp16, input = k_cache_27)[name = string("coreml_update_state_221_write_state")]; tensor coreml_update_state_221 = read_state(input = k_cache_27)[name = string("coreml_update_state_221")]; tensor read_state_55 = read_state(input = v_cache_27)[name = string("read_state_55")]; tensor v_cache_165_cast_fp16 = mul(x = read_state_55, y = var_264_cast_fp16)[name = string("v_cache_165_cast_fp16")]; write_state(data = v_cache_165_cast_fp16, input = v_cache_27)[name = string("coreml_update_state_222_write_state")]; tensor coreml_update_state_222 = read_state(input = v_cache_27)[name = string("coreml_update_state_222")]; tensor v_cast_fp16 = transpose(perm = v_perm_0, x = var_5187_cast_fp16)[name = string("transpose_1")]; tensor var_5265_cast_fp16 = mul(x = v_cast_fp16, y = onehot_cast_fp16)[name = string("op_5265_cast_fp16")]; tensor v_cache_cast_fp16 = add(x = coreml_update_state_222, y = var_5265_cast_fp16)[name = string("v_cache_cast_fp16")]; write_state(data = v_cache_cast_fp16, input = v_cache_27)[name = string("coreml_update_state_223_write_state")]; tensor coreml_update_state_223 = read_state(input = v_cache_27)[name = string("coreml_update_state_223")]; tensor var_5267_axes_0 = const()[name = string("op_5267_axes_0"), val = tensor([2])]; tensor var_5267_cast_fp16 = expand_dims(axes = var_5267_axes_0, x = coreml_update_state_221)[name = string("op_5267_cast_fp16")]; tensor k_exp_109_reps_0 = const()[name = string("k_exp_109_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_109_cast_fp16 = tile(reps = k_exp_109_reps_0, x = var_5267_cast_fp16)[name = string("k_exp_109_cast_fp16")]; tensor var_5270 = const()[name = string("op_5270"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_cast_fp16 = reshape(shape = var_5270, x = k_exp_109_cast_fp16)[name = string("k_exp_cast_fp16")]; tensor var_5272_axes_0 = const()[name = string("op_5272_axes_0"), val = tensor([2])]; tensor var_5272_cast_fp16 = expand_dims(axes = var_5272_axes_0, x = coreml_update_state_223)[name = string("op_5272_cast_fp16")]; tensor v_exp_109_reps_0 = const()[name = string("v_exp_109_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_109_cast_fp16 = tile(reps = v_exp_109_reps_0, x = var_5272_cast_fp16)[name = string("v_exp_109_cast_fp16")]; tensor var_5275 = const()[name = string("op_5275"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_cast_fp16 = reshape(shape = var_5275, x = v_exp_109_cast_fp16)[name = string("v_exp_cast_fp16")]; bool var_5278_transpose_x_1 = const()[name = string("op_5278_transpose_x_1"), val = bool(false)]; bool var_5278_transpose_y_1 = const()[name = string("op_5278_transpose_y_1"), val = bool(true)]; tensor var_5278_cast_fp16 = matmul(transpose_x = var_5278_transpose_x_1, transpose_y = var_5278_transpose_y_1, x = q_cast_fp16, y = k_exp_cast_fp16)[name = string("op_5278_cast_fp16")]; fp16 var_5279_to_fp16 = const()[name = string("op_5279_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_109_cast_fp16 = mul(x = var_5278_cast_fp16, y = var_5279_to_fp16)[name = string("attn_109_cast_fp16")]; tensor input_271_cast_fp16 = add(x = attn_109_cast_fp16, y = attention_mask_to_fp16)[name = string("input_271_cast_fp16")]; tensor attn_cast_fp16 = softmax(axis = var_5144, x = input_271_cast_fp16)[name = string("attn_cast_fp16")]; bool out_transpose_x_0 = const()[name = string("out_transpose_x_0"), val = bool(false)]; bool out_transpose_y_0 = const()[name = string("out_transpose_y_0"), val = bool(false)]; tensor out_cast_fp16 = matmul(transpose_x = out_transpose_x_0, transpose_y = out_transpose_y_0, x = attn_cast_fp16, y = v_exp_cast_fp16)[name = string("out_cast_fp16")]; tensor var_5284_perm_0 = const()[name = string("op_5284_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5285 = const()[name = string("op_5285"), val = tensor([1, 1, -1])]; tensor var_5284_cast_fp16 = transpose(perm = var_5284_perm_0, x = out_cast_fp16)[name = string("transpose_0")]; tensor input_273_cast_fp16 = reshape(shape = var_5285, x = var_5284_cast_fp16)[name = string("input_273_cast_fp16")]; tensor layers_27_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429137280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431234496))))[name = string("layers_27_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_192_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_o_proj_weight_to_fp16_palettized, x = input_273_cast_fp16)[name = string("linear_192_cast_fp16")]; tensor x_721_cast_fp16 = add(x = x_701_cast_fp16, y = linear_192_cast_fp16)[name = string("x_721_cast_fp16")]; fp16 var_5143_promoted_3_to_fp16 = const()[name = string("op_5143_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_5292_cast_fp16 = pow(x = x_721_cast_fp16, y = var_5143_promoted_3_to_fp16)[name = string("op_5292_cast_fp16")]; tensor var_5294_axes_0 = const()[name = string("op_5294_axes_0"), val = tensor([-1])]; bool var_5294_keep_dims_0 = const()[name = string("op_5294_keep_dims_0"), val = bool(true)]; tensor var_5294_cast_fp16 = reduce_mean(axes = var_5294_axes_0, keep_dims = var_5294_keep_dims_0, x = var_5292_cast_fp16)[name = string("op_5294_cast_fp16")]; fp16 var_5295_to_fp16 = const()[name = string("op_5295_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5296_cast_fp16 = add(x = var_5294_cast_fp16, y = var_5295_to_fp16)[name = string("op_5296_cast_fp16")]; fp32 norm_223_epsilon_0 = const()[name = string("norm_223_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_223_cast_fp16 = rsqrt(epsilon = norm_223_epsilon_0, x = var_5296_cast_fp16)[name = string("norm_223_cast_fp16")]; tensor var_5298_cast_fp16 = mul(x = x_721_cast_fp16, y = norm_223_cast_fp16)[name = string("op_5298_cast_fp16")]; tensor layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431235072)))]; tensor var_5299_cast_fp16 = mul(x = var_5298_cast_fp16, y = layers_27_post_attention_layernorm_weight_to_fp16)[name = string("op_5299_cast_fp16")]; tensor layers_27_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431237184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434382976))))[name = string("layers_27_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_gate_proj_weight_to_fp16_palettized, x = var_5299_cast_fp16)[name = string("linear_193_cast_fp16")]; tensor var_5309_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_5309_cast_fp16")]; tensor layers_27_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434383552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437529344))))[name = string("layers_27_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_up_proj_weight_to_fp16_palettized, x = var_5299_cast_fp16)[name = string("linear_194_cast_fp16")]; tensor input_279_cast_fp16 = mul(x = var_5309_cast_fp16, y = linear_194_cast_fp16)[name = string("input_279_cast_fp16")]; tensor layers_27_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437529920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440675712))))[name = string("layers_27_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_195_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_mlp_down_proj_weight_to_fp16_palettized, x = input_279_cast_fp16)[name = string("linear_195_cast_fp16")]; tensor x_727_cast_fp16 = add(x = x_721_cast_fp16, y = linear_195_cast_fp16)[name = string("x_727_cast_fp16")]; fp16 var_5319_promoted_to_fp16 = const()[name = string("op_5319_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5325_cast_fp16 = pow(x = x_727_cast_fp16, y = var_5319_promoted_to_fp16)[name = string("op_5325_cast_fp16")]; tensor var_5327_axes_0 = const()[name = string("op_5327_axes_0"), val = tensor([-1])]; bool var_5327_keep_dims_0 = const()[name = string("op_5327_keep_dims_0"), val = bool(true)]; tensor var_5327_cast_fp16 = reduce_mean(axes = var_5327_axes_0, keep_dims = var_5327_keep_dims_0, x = var_5325_cast_fp16)[name = string("op_5327_cast_fp16")]; fp16 var_5328_to_fp16 = const()[name = string("op_5328_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5329_cast_fp16 = add(x = var_5327_cast_fp16, y = var_5328_to_fp16)[name = string("op_5329_cast_fp16")]; fp32 norm_225_epsilon_0 = const()[name = string("norm_225_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_225_cast_fp16 = rsqrt(epsilon = norm_225_epsilon_0, x = var_5329_cast_fp16)[name = string("norm_225_cast_fp16")]; tensor var_5331_cast_fp16 = mul(x = x_727_cast_fp16, y = norm_225_cast_fp16)[name = string("op_5331_cast_fp16")]; tensor norm_weight_to_fp16 = const()[name = string("norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440676288)))]; tensor var_5332_cast_fp16 = mul(x = var_5331_cast_fp16, y = norm_weight_to_fp16)[name = string("op_5332_cast_fp16")]; tensor lm_head_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440678400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596260928))))[name = string("lm_head_weight_to_fp16_palettized")]; tensor linear_196_bias_0_to_fp16 = const()[name = string("linear_196_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596261504)))]; tensor logits = linear(bias = linear_196_bias_0_to_fp16, weight = lm_head_weight_to_fp16_palettized, x = var_5332_cast_fp16)[name = string("linear_196_cast_fp16")]; } -> (logits); }