program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] { func main(tensor attention_mask, tensor input_embeds, state> k_cache_0, state> k_cache_1, state> k_cache_10, state> k_cache_11, state> k_cache_12, state> k_cache_13, state> k_cache_2, state> k_cache_3, state> k_cache_4, state> k_cache_5, state> k_cache_6, state> k_cache_7, state> k_cache_8, state> k_cache_9, tensor positions, state> v_cache_0, state> v_cache_1, state> v_cache_10, state> v_cache_11, state> v_cache_12, state> v_cache_13, state> v_cache_2, state> v_cache_3, state> v_cache_4, state> v_cache_5, state> v_cache_6, state> v_cache_7, state> v_cache_8, state> v_cache_9) { int32 var_68_one_hot_vector_size_0 = const()[name = string("op_68_one_hot_vector_size_0"), val = int32(1024)]; int32 var_68_axis_0 = const()[name = string("op_68_axis_0"), val = int32(-1)]; int32 var_68_on_value_0 = const()[name = string("op_68_on_value_0"), val = int32(1)]; int32 var_68_off_value_0 = const()[name = string("op_68_off_value_0"), val = int32(0)]; tensor var_68 = one_hot(axis = var_68_axis_0, indices = positions, off_value = var_68_off_value_0, on_value = var_68_on_value_0, one_hot_vector_size = var_68_one_hot_vector_size_0)[name = string("op_68")]; tensor var_78_axes_0 = const()[name = string("op_78_axes_0"), val = tensor([0])]; bool var_78_keep_dims_0 = const()[name = string("op_78_keep_dims_0"), val = bool(false)]; string cast_1_to_fp16_dtype_0 = const()[name = string("cast_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_68_to_fp16 = cast(dtype = cast_1_to_fp16_dtype_0, x = var_68)[name = string("cast_3")]; tensor var_78_cast_fp16 = reduce_sum(axes = var_78_axes_0, keep_dims = var_78_keep_dims_0, x = var_68_to_fp16)[name = string("op_78_cast_fp16")]; tensor var_83 = const()[name = string("op_83"), val = tensor([1, 1, 1024, 1])]; tensor var_84_cast_fp16 = reshape(shape = var_83, x = var_78_cast_fp16)[name = string("op_84_cast_fp16")]; int32 var_99 = const()[name = string("op_99"), val = int32(-1)]; string input_embeds_to_fp16_dtype_0 = const()[name = string("input_embeds_to_fp16_dtype_0"), val = string("fp16")]; fp16 var_98_promoted_to_fp16 = const()[name = string("op_98_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor input_embeds_to_fp16 = cast(dtype = input_embeds_to_fp16_dtype_0, x = input_embeds)[name = string("cast_2")]; tensor var_108_cast_fp16 = pow(x = input_embeds_to_fp16, y = var_98_promoted_to_fp16)[name = string("op_108_cast_fp16")]; tensor var_110_axes_0 = const()[name = string("op_110_axes_0"), val = tensor([-1])]; bool var_110_keep_dims_0 = const()[name = string("op_110_keep_dims_0"), val = bool(true)]; tensor var_110_cast_fp16 = reduce_mean(axes = var_110_axes_0, keep_dims = var_110_keep_dims_0, x = var_108_cast_fp16)[name = string("op_110_cast_fp16")]; fp16 var_111_to_fp16 = const()[name = string("op_111_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_112_cast_fp16 = add(x = var_110_cast_fp16, y = var_111_to_fp16)[name = string("op_112_cast_fp16")]; fp32 norm_1_epsilon_0 = const()[name = string("norm_1_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_1_cast_fp16 = rsqrt(epsilon = norm_1_epsilon_0, x = var_112_cast_fp16)[name = string("norm_1_cast_fp16")]; tensor var_114_cast_fp16 = mul(x = input_embeds_to_fp16, y = norm_1_cast_fp16)[name = string("op_114_cast_fp16")]; tensor layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_115_cast_fp16 = mul(x = var_114_cast_fp16, y = layers_0_input_layernorm_weight_to_fp16)[name = string("op_115_cast_fp16")]; tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099392))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099968)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = var_115_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_131 = const()[name = string("op_131"), val = tensor([1, 128, 16, 128])]; tensor var_132_cast_fp16 = reshape(shape = var_131, x = linear_0_cast_fp16)[name = string("op_132_cast_fp16")]; tensor x_5_perm_0 = const()[name = string("x_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2104128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3152768))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3153344)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = var_115_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_136 = const()[name = string("op_136"), val = tensor([1, 128, 8, 128])]; tensor var_137_cast_fp16 = reshape(shape = var_136, x = linear_1_cast_fp16)[name = string("op_137_cast_fp16")]; tensor x_9_perm_0 = const()[name = string("x_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3155456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204096))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = var_115_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_141 = const()[name = string("op_141"), val = tensor([1, 128, 8, 128])]; tensor var_142_cast_fp16 = reshape(shape = var_141, x = linear_2_cast_fp16)[name = string("op_142_cast_fp16")]; tensor transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_98_promoted_1_to_fp16 = const()[name = string("op_98_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_5_cast_fp16 = transpose(perm = x_5_perm_0, x = var_132_cast_fp16)[name = string("transpose_97")]; tensor var_146_cast_fp16 = pow(x = x_5_cast_fp16, y = var_98_promoted_1_to_fp16)[name = string("op_146_cast_fp16")]; tensor var_148_axes_0 = const()[name = string("op_148_axes_0"), val = tensor([-1])]; bool var_148_keep_dims_0 = const()[name = string("op_148_keep_dims_0"), val = bool(true)]; tensor var_148_cast_fp16 = reduce_mean(axes = var_148_axes_0, keep_dims = var_148_keep_dims_0, x = var_146_cast_fp16)[name = string("op_148_cast_fp16")]; fp16 var_149_to_fp16 = const()[name = string("op_149_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_150_cast_fp16 = add(x = var_148_cast_fp16, y = var_149_to_fp16)[name = string("op_150_cast_fp16")]; fp32 norm_3_epsilon_0 = const()[name = string("norm_3_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_3_cast_fp16 = rsqrt(epsilon = norm_3_epsilon_0, x = var_150_cast_fp16)[name = string("norm_3_cast_fp16")]; tensor var_152_cast_fp16 = mul(x = x_5_cast_fp16, y = norm_3_cast_fp16)[name = string("op_152_cast_fp16")]; tensor layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204672)))]; tensor var_153_cast_fp16 = mul(x = var_152_cast_fp16, y = layers_0_self_attn_q_norm_weight_to_fp16)[name = string("op_153_cast_fp16")]; fp16 var_98_promoted_2_to_fp16 = const()[name = string("op_98_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_9_cast_fp16 = transpose(perm = x_9_perm_0, x = var_137_cast_fp16)[name = string("transpose_96")]; tensor var_157_cast_fp16 = pow(x = x_9_cast_fp16, y = var_98_promoted_2_to_fp16)[name = string("op_157_cast_fp16")]; tensor var_159_axes_0 = const()[name = string("op_159_axes_0"), val = tensor([-1])]; bool var_159_keep_dims_0 = const()[name = string("op_159_keep_dims_0"), val = bool(true)]; tensor var_159_cast_fp16 = reduce_mean(axes = var_159_axes_0, keep_dims = var_159_keep_dims_0, x = var_157_cast_fp16)[name = string("op_159_cast_fp16")]; fp16 var_160_to_fp16 = const()[name = string("op_160_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_161_cast_fp16 = add(x = var_159_cast_fp16, y = var_160_to_fp16)[name = string("op_161_cast_fp16")]; fp32 norm_5_epsilon_0 = const()[name = string("norm_5_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_5_cast_fp16 = rsqrt(epsilon = norm_5_epsilon_0, x = var_161_cast_fp16)[name = string("norm_5_cast_fp16")]; tensor var_163_cast_fp16 = mul(x = x_9_cast_fp16, y = norm_5_cast_fp16)[name = string("op_163_cast_fp16")]; tensor layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204992)))]; tensor var_164_cast_fp16 = mul(x = var_163_cast_fp16, y = layers_0_self_attn_k_norm_weight_to_fp16)[name = string("op_164_cast_fp16")]; tensor var_168_axes_0 = const()[name = string("op_168_axes_0"), val = tensor([-1])]; string cast_12_to_fp16_dtype_0 = const()[name = string("cast_12_to_fp16_dtype_0"), val = string("fp16")]; tensor positions_to_fp16 = cast(dtype = cast_12_to_fp16_dtype_0, x = positions)[name = string("cast_1")]; tensor var_168_cast_fp16 = expand_dims(axes = var_168_axes_0, x = positions_to_fp16)[name = string("op_168_cast_fp16")]; tensor layers_0_self_attn_rope_inv_freq_to_fp16 = const()[name = string("layers_0_self_attn_rope_inv_freq_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205312)))]; tensor freqs_1_cast_fp16 = mul(x = var_168_cast_fp16, y = layers_0_self_attn_rope_inv_freq_to_fp16)[name = string("freqs_1_cast_fp16")]; tensor var_170_cast_fp16 = cos(x = freqs_1_cast_fp16)[name = string("op_170_cast_fp16")]; tensor var_172 = const()[name = string("op_172"), val = tensor([1, 1, -1, 64])]; tensor cos_val_1_cast_fp16 = reshape(shape = var_172, x = var_170_cast_fp16)[name = string("cos_val_1_cast_fp16")]; tensor var_174_cast_fp16 = sin(x = freqs_1_cast_fp16)[name = string("op_174_cast_fp16")]; tensor var_176 = const()[name = string("op_176"), val = tensor([1, 1, -1, 64])]; tensor sin_val_1_cast_fp16 = reshape(shape = var_176, x = var_174_cast_fp16)[name = string("sin_val_1_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_153_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_153_cast_fp16)[name = string("x2_1_cast_fp16")]; tensor var_185_cast_fp16 = mul(x = x1_1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_185_cast_fp16")]; tensor var_186_cast_fp16 = mul(x = x2_1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_186_cast_fp16")]; tensor var_187_cast_fp16 = sub(x = var_185_cast_fp16, y = var_186_cast_fp16)[name = string("op_187_cast_fp16")]; tensor var_188_cast_fp16 = mul(x = x2_1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_188_cast_fp16")]; tensor var_189_cast_fp16 = mul(x = x1_1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_189_cast_fp16")]; tensor var_190_cast_fp16 = add(x = var_188_cast_fp16, y = var_189_cast_fp16)[name = string("op_190_cast_fp16")]; bool q_1_interleave_0 = const()[name = string("q_1_interleave_0"), val = bool(false)]; tensor q_1_cast_fp16 = concat(axis = var_99, interleave = q_1_interleave_0, values = (var_187_cast_fp16, var_190_cast_fp16))[name = string("q_1_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_164_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_164_cast_fp16)[name = string("x2_3_cast_fp16")]; tensor var_212_cast_fp16 = mul(x = x1_3_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_212_cast_fp16")]; tensor var_213_cast_fp16 = mul(x = x2_3_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_213_cast_fp16")]; tensor var_214_cast_fp16 = sub(x = var_212_cast_fp16, y = var_213_cast_fp16)[name = string("op_214_cast_fp16")]; tensor var_215_cast_fp16 = mul(x = x2_3_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_215_cast_fp16")]; tensor var_216_cast_fp16 = mul(x = x1_3_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_216_cast_fp16")]; tensor var_217_cast_fp16 = add(x = var_215_cast_fp16, y = var_216_cast_fp16)[name = string("op_217_cast_fp16")]; bool var_219_interleave_0 = const()[name = string("op_219_interleave_0"), val = bool(false)]; tensor var_219_cast_fp16 = concat(axis = var_99, interleave = var_219_interleave_0, values = (var_214_cast_fp16, var_217_cast_fp16))[name = string("op_219_cast_fp16")]; tensor transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_4 = const()[name = string("concat_4"), val = tensor([128, 1024])]; tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = var_219_cast_fp16)[name = string("transpose_95")]; tensor reshape_1_cast_fp16 = reshape(shape = concat_4, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")]; bool matmul_0_transpose_x_1 = const()[name = string("matmul_0_transpose_x_1"), val = bool(true)]; bool matmul_0_transpose_y_1 = const()[name = string("matmul_0_transpose_y_1"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_1, transpose_y = matmul_0_transpose_y_1, x = var_68_to_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor concat_7 = const()[name = string("concat_7"), val = tensor([1024, 1, 8, 128])]; tensor reshape_2_cast_fp16 = reshape(shape = concat_7, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")]; tensor scattered_k_1_perm_0 = const()[name = string("scattered_k_1_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_12 = const()[name = string("concat_12"), val = tensor([128, 1024])]; tensor transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = var_142_cast_fp16)[name = string("transpose_94")]; tensor reshape_4_cast_fp16 = reshape(shape = concat_12, x = transpose_56_cast_fp16)[name = string("reshape_4_cast_fp16")]; bool matmul_1_transpose_x_1 = const()[name = string("matmul_1_transpose_x_1"), val = bool(true)]; bool matmul_1_transpose_y_1 = const()[name = string("matmul_1_transpose_y_1"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = var_68_to_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor concat_15 = const()[name = string("concat_15"), val = tensor([1024, 1, 8, 128])]; tensor reshape_5_cast_fp16 = reshape(shape = concat_15, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")]; tensor scattered_v_1_perm_0 = const()[name = string("scattered_v_1_perm_0"), val = tensor([1, 2, 0, 3])]; fp16 var_101_promoted_to_fp16 = const()[name = string("op_101_promoted_to_fp16"), val = fp16(0x1p+0)]; tensor var_224_cast_fp16 = sub(x = var_101_promoted_to_fp16, y = var_84_cast_fp16)[name = string("op_224_cast_fp16")]; tensor read_state_0 = read_state(input = k_cache_0)[name = string("read_state_0")]; tensor k_cache_3_cast_fp16 = mul(x = read_state_0, y = var_224_cast_fp16)[name = string("k_cache_3_cast_fp16")]; write_state(data = k_cache_3_cast_fp16, input = k_cache_0)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = k_cache_0)[name = string("coreml_update_state_56")]; tensor scattered_k_1_cast_fp16 = transpose(perm = scattered_k_1_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_93")]; tensor k_cache_5_cast_fp16 = add(x = coreml_update_state_56, y = scattered_k_1_cast_fp16)[name = string("k_cache_5_cast_fp16")]; write_state(data = k_cache_5_cast_fp16, input = k_cache_0)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = k_cache_0)[name = string("coreml_update_state_57")]; tensor read_state_1 = read_state(input = v_cache_0)[name = string("read_state_1")]; tensor v_cache_3_cast_fp16 = mul(x = read_state_1, y = var_224_cast_fp16)[name = string("v_cache_3_cast_fp16")]; write_state(data = v_cache_3_cast_fp16, input = v_cache_0)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = v_cache_0)[name = string("coreml_update_state_58")]; tensor scattered_v_1_cast_fp16 = transpose(perm = scattered_v_1_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_92")]; tensor v_cache_5_cast_fp16 = add(x = coreml_update_state_58, y = scattered_v_1_cast_fp16)[name = string("v_cache_5_cast_fp16")]; write_state(data = v_cache_5_cast_fp16, input = v_cache_0)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = v_cache_0)[name = string("coreml_update_state_59")]; tensor var_230_axes_0 = const()[name = string("op_230_axes_0"), val = tensor([2])]; tensor var_230_cast_fp16 = expand_dims(axes = var_230_axes_0, x = coreml_update_state_57)[name = string("op_230_cast_fp16")]; tensor k_exp_1_reps_0 = const()[name = string("k_exp_1_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_1_cast_fp16 = tile(reps = k_exp_1_reps_0, x = var_230_cast_fp16)[name = string("k_exp_1_cast_fp16")]; tensor var_233 = const()[name = string("op_233"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_3_cast_fp16 = reshape(shape = var_233, x = k_exp_1_cast_fp16)[name = string("k_exp_3_cast_fp16")]; tensor var_235_axes_0 = const()[name = string("op_235_axes_0"), val = tensor([2])]; tensor var_235_cast_fp16 = expand_dims(axes = var_235_axes_0, x = coreml_update_state_59)[name = string("op_235_cast_fp16")]; tensor v_exp_1_reps_0 = const()[name = string("v_exp_1_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_1_cast_fp16 = tile(reps = v_exp_1_reps_0, x = var_235_cast_fp16)[name = string("v_exp_1_cast_fp16")]; tensor var_238 = const()[name = string("op_238"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_3_cast_fp16 = reshape(shape = var_238, x = v_exp_1_cast_fp16)[name = string("v_exp_3_cast_fp16")]; bool var_241_transpose_x_1 = const()[name = string("op_241_transpose_x_1"), val = bool(false)]; bool var_241_transpose_y_1 = const()[name = string("op_241_transpose_y_1"), val = bool(true)]; tensor var_241_cast_fp16 = matmul(transpose_x = var_241_transpose_x_1, transpose_y = var_241_transpose_y_1, x = q_1_cast_fp16, y = k_exp_3_cast_fp16)[name = string("op_241_cast_fp16")]; fp16 var_242_to_fp16 = const()[name = string("op_242_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_1_cast_fp16 = mul(x = var_241_cast_fp16, y = var_242_to_fp16)[name = string("attn_1_cast_fp16")]; string attention_mask_to_fp16_dtype_0 = const()[name = string("attention_mask_to_fp16_dtype_0"), val = string("fp16")]; tensor attention_mask_to_fp16 = cast(dtype = attention_mask_to_fp16_dtype_0, x = attention_mask)[name = string("cast_0")]; tensor input_1_cast_fp16 = add(x = attn_1_cast_fp16, y = attention_mask_to_fp16)[name = string("input_1_cast_fp16")]; tensor attn_3_cast_fp16 = softmax(axis = var_99, x = input_1_cast_fp16)[name = string("attn_3_cast_fp16")]; bool out_1_transpose_x_0 = const()[name = string("out_1_transpose_x_0"), val = bool(false)]; bool out_1_transpose_y_0 = const()[name = string("out_1_transpose_y_0"), val = bool(false)]; tensor out_1_cast_fp16 = matmul(transpose_x = out_1_transpose_x_0, transpose_y = out_1_transpose_y_0, x = attn_3_cast_fp16, y = v_exp_3_cast_fp16)[name = string("out_1_cast_fp16")]; tensor var_247_perm_0 = const()[name = string("op_247_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_248 = const()[name = string("op_248"), val = tensor([1, 128, -1])]; tensor var_247_cast_fp16 = transpose(perm = var_247_perm_0, x = out_1_cast_fp16)[name = string("transpose_91")]; tensor input_3_cast_fp16 = reshape(shape = var_248, x = var_247_cast_fp16)[name = string("input_3_cast_fp16")]; tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6302720))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor x_19_cast_fp16 = add(x = input_embeds_to_fp16, y = linear_3_cast_fp16)[name = string("x_19_cast_fp16")]; fp16 var_98_promoted_3_to_fp16 = const()[name = string("op_98_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_255_cast_fp16 = pow(x = x_19_cast_fp16, y = var_98_promoted_3_to_fp16)[name = string("op_255_cast_fp16")]; tensor var_257_axes_0 = const()[name = string("op_257_axes_0"), val = tensor([-1])]; bool var_257_keep_dims_0 = const()[name = string("op_257_keep_dims_0"), val = bool(true)]; tensor var_257_cast_fp16 = reduce_mean(axes = var_257_axes_0, keep_dims = var_257_keep_dims_0, x = var_255_cast_fp16)[name = string("op_257_cast_fp16")]; fp16 var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_259_cast_fp16 = add(x = var_257_cast_fp16, y = var_258_to_fp16)[name = string("op_259_cast_fp16")]; fp32 norm_7_epsilon_0 = const()[name = string("norm_7_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_7_cast_fp16 = rsqrt(epsilon = norm_7_epsilon_0, x = var_259_cast_fp16)[name = string("norm_7_cast_fp16")]; tensor var_261_cast_fp16 = mul(x = x_19_cast_fp16, y = norm_7_cast_fp16)[name = string("op_261_cast_fp16")]; tensor layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6303296)))]; tensor var_262_cast_fp16 = mul(x = var_261_cast_fp16, y = layers_0_post_attention_layernorm_weight_to_fp16)[name = string("op_262_cast_fp16")]; tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6305408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451200))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451776)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = var_262_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_272_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_272_cast_fp16")]; tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9457984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12603776))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = var_262_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_9_cast_fp16 = mul(x = var_272_cast_fp16, y = linear_5_cast_fp16)[name = string("input_9_cast_fp16")]; tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12604352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750144))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_6_cast_fp16)[name = string("x_25_cast_fp16")]; int32 var_293 = const()[name = string("op_293"), val = int32(-1)]; fp16 var_292_promoted_to_fp16 = const()[name = string("op_292_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_302_cast_fp16 = pow(x = x_25_cast_fp16, y = var_292_promoted_to_fp16)[name = string("op_302_cast_fp16")]; tensor var_304_axes_0 = const()[name = string("op_304_axes_0"), val = tensor([-1])]; bool var_304_keep_dims_0 = const()[name = string("op_304_keep_dims_0"), val = bool(true)]; tensor var_304_cast_fp16 = reduce_mean(axes = var_304_axes_0, keep_dims = var_304_keep_dims_0, x = var_302_cast_fp16)[name = string("op_304_cast_fp16")]; fp16 var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_306_cast_fp16 = add(x = var_304_cast_fp16, y = var_305_to_fp16)[name = string("op_306_cast_fp16")]; fp32 norm_9_epsilon_0 = const()[name = string("norm_9_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_9_cast_fp16 = rsqrt(epsilon = norm_9_epsilon_0, x = var_306_cast_fp16)[name = string("norm_9_cast_fp16")]; tensor var_308_cast_fp16 = mul(x = x_25_cast_fp16, y = norm_9_cast_fp16)[name = string("op_308_cast_fp16")]; tensor layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750720)))]; tensor var_309_cast_fp16 = mul(x = var_308_cast_fp16, y = layers_1_input_layernorm_weight_to_fp16)[name = string("op_309_cast_fp16")]; tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850048))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = var_309_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor var_325 = const()[name = string("op_325"), val = tensor([1, 128, 16, 128])]; tensor var_326_cast_fp16 = reshape(shape = var_325, x = linear_7_cast_fp16)[name = string("op_326_cast_fp16")]; tensor x_31_perm_0 = const()[name = string("x_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899264))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = var_309_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_330 = const()[name = string("op_330"), val = tensor([1, 128, 8, 128])]; tensor var_331_cast_fp16 = reshape(shape = var_330, x = linear_8_cast_fp16)[name = string("op_331_cast_fp16")]; tensor x_35_perm_0 = const()[name = string("x_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19948480))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = var_309_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor var_335 = const()[name = string("op_335"), val = tensor([1, 128, 8, 128])]; tensor var_336_cast_fp16 = reshape(shape = var_335, x = linear_9_cast_fp16)[name = string("op_336_cast_fp16")]; tensor transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_292_promoted_1_to_fp16 = const()[name = string("op_292_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_31_cast_fp16 = transpose(perm = x_31_perm_0, x = var_326_cast_fp16)[name = string("transpose_90")]; tensor var_340_cast_fp16 = pow(x = x_31_cast_fp16, y = var_292_promoted_1_to_fp16)[name = string("op_340_cast_fp16")]; tensor var_342_axes_0 = const()[name = string("op_342_axes_0"), val = tensor([-1])]; bool var_342_keep_dims_0 = const()[name = string("op_342_keep_dims_0"), val = bool(true)]; tensor var_342_cast_fp16 = reduce_mean(axes = var_342_axes_0, keep_dims = var_342_keep_dims_0, x = var_340_cast_fp16)[name = string("op_342_cast_fp16")]; fp16 var_343_to_fp16 = const()[name = string("op_343_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_344_cast_fp16 = add(x = var_342_cast_fp16, y = var_343_to_fp16)[name = string("op_344_cast_fp16")]; fp32 norm_11_epsilon_0 = const()[name = string("norm_11_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_11_cast_fp16 = rsqrt(epsilon = norm_11_epsilon_0, x = var_344_cast_fp16)[name = string("norm_11_cast_fp16")]; tensor var_346_cast_fp16 = mul(x = x_31_cast_fp16, y = norm_11_cast_fp16)[name = string("op_346_cast_fp16")]; tensor layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949056)))]; tensor var_347_cast_fp16 = mul(x = var_346_cast_fp16, y = layers_1_self_attn_q_norm_weight_to_fp16)[name = string("op_347_cast_fp16")]; fp16 var_292_promoted_2_to_fp16 = const()[name = string("op_292_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_35_cast_fp16 = transpose(perm = x_35_perm_0, x = var_331_cast_fp16)[name = string("transpose_89")]; tensor var_351_cast_fp16 = pow(x = x_35_cast_fp16, y = var_292_promoted_2_to_fp16)[name = string("op_351_cast_fp16")]; tensor var_353_axes_0 = const()[name = string("op_353_axes_0"), val = tensor([-1])]; bool var_353_keep_dims_0 = const()[name = string("op_353_keep_dims_0"), val = bool(true)]; tensor var_353_cast_fp16 = reduce_mean(axes = var_353_axes_0, keep_dims = var_353_keep_dims_0, x = var_351_cast_fp16)[name = string("op_353_cast_fp16")]; fp16 var_354_to_fp16 = const()[name = string("op_354_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_355_cast_fp16 = add(x = var_353_cast_fp16, y = var_354_to_fp16)[name = string("op_355_cast_fp16")]; fp32 norm_13_epsilon_0 = const()[name = string("norm_13_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_13_cast_fp16 = rsqrt(epsilon = norm_13_epsilon_0, x = var_355_cast_fp16)[name = string("norm_13_cast_fp16")]; tensor var_357_cast_fp16 = mul(x = x_35_cast_fp16, y = norm_13_cast_fp16)[name = string("op_357_cast_fp16")]; tensor layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949376)))]; tensor var_358_cast_fp16 = mul(x = var_357_cast_fp16, y = layers_1_self_attn_k_norm_weight_to_fp16)[name = string("op_358_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_347_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_347_cast_fp16)[name = string("x2_5_cast_fp16")]; tensor var_379_cast_fp16 = mul(x = x1_5_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_379_cast_fp16")]; tensor var_380_cast_fp16 = mul(x = x2_5_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_380_cast_fp16")]; tensor var_381_cast_fp16 = sub(x = var_379_cast_fp16, y = var_380_cast_fp16)[name = string("op_381_cast_fp16")]; tensor var_382_cast_fp16 = mul(x = x2_5_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_382_cast_fp16")]; tensor var_383_cast_fp16 = mul(x = x1_5_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_383_cast_fp16")]; tensor var_384_cast_fp16 = add(x = var_382_cast_fp16, y = var_383_cast_fp16)[name = string("op_384_cast_fp16")]; bool q_3_interleave_0 = const()[name = string("q_3_interleave_0"), val = bool(false)]; tensor q_3_cast_fp16 = concat(axis = var_293, interleave = q_3_interleave_0, values = (var_381_cast_fp16, var_384_cast_fp16))[name = string("q_3_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_358_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_358_cast_fp16)[name = string("x2_7_cast_fp16")]; tensor var_406_cast_fp16 = mul(x = x1_7_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_406_cast_fp16")]; tensor var_407_cast_fp16 = mul(x = x2_7_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_407_cast_fp16")]; tensor var_408_cast_fp16 = sub(x = var_406_cast_fp16, y = var_407_cast_fp16)[name = string("op_408_cast_fp16")]; tensor var_409_cast_fp16 = mul(x = x2_7_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_409_cast_fp16")]; tensor var_410_cast_fp16 = mul(x = x1_7_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_410_cast_fp16")]; tensor var_411_cast_fp16 = add(x = var_409_cast_fp16, y = var_410_cast_fp16)[name = string("op_411_cast_fp16")]; bool var_413_interleave_0 = const()[name = string("op_413_interleave_0"), val = bool(false)]; tensor var_413_cast_fp16 = concat(axis = var_293, interleave = var_413_interleave_0, values = (var_408_cast_fp16, var_411_cast_fp16))[name = string("op_413_cast_fp16")]; tensor transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_22 = const()[name = string("concat_22"), val = tensor([128, 1024])]; tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = var_413_cast_fp16)[name = string("transpose_88")]; tensor reshape_7_cast_fp16 = reshape(shape = concat_22, x = transpose_5_cast_fp16)[name = string("reshape_7_cast_fp16")]; bool matmul_2_transpose_x_1 = const()[name = string("matmul_2_transpose_x_1"), val = bool(true)]; bool matmul_2_transpose_y_1 = const()[name = string("matmul_2_transpose_y_1"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_1, transpose_y = matmul_2_transpose_y_1, x = var_68_to_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor concat_25 = const()[name = string("concat_25"), val = tensor([1024, 1, 8, 128])]; tensor reshape_8_cast_fp16 = reshape(shape = concat_25, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")]; tensor scattered_k_3_perm_0 = const()[name = string("scattered_k_3_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_30 = const()[name = string("concat_30"), val = tensor([128, 1024])]; tensor transpose_57_cast_fp16 = transpose(perm = transpose_57_perm_0, x = var_336_cast_fp16)[name = string("transpose_87")]; tensor reshape_10_cast_fp16 = reshape(shape = concat_30, x = transpose_57_cast_fp16)[name = string("reshape_10_cast_fp16")]; bool matmul_3_transpose_x_1 = const()[name = string("matmul_3_transpose_x_1"), val = bool(true)]; bool matmul_3_transpose_y_1 = const()[name = string("matmul_3_transpose_y_1"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = var_68_to_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor concat_33 = const()[name = string("concat_33"), val = tensor([1024, 1, 8, 128])]; tensor reshape_11_cast_fp16 = reshape(shape = concat_33, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")]; tensor scattered_v_3_perm_0 = const()[name = string("scattered_v_3_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_2 = read_state(input = k_cache_1)[name = string("read_state_2")]; tensor k_cache_9_cast_fp16 = mul(x = read_state_2, y = var_224_cast_fp16)[name = string("k_cache_9_cast_fp16")]; write_state(data = k_cache_9_cast_fp16, input = k_cache_1)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = k_cache_1)[name = string("coreml_update_state_60")]; tensor scattered_k_3_cast_fp16 = transpose(perm = scattered_k_3_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_86")]; tensor k_cache_11_cast_fp16 = add(x = coreml_update_state_60, y = scattered_k_3_cast_fp16)[name = string("k_cache_11_cast_fp16")]; write_state(data = k_cache_11_cast_fp16, input = k_cache_1)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = k_cache_1)[name = string("coreml_update_state_61")]; tensor read_state_3 = read_state(input = v_cache_1)[name = string("read_state_3")]; tensor v_cache_9_cast_fp16 = mul(x = read_state_3, y = var_224_cast_fp16)[name = string("v_cache_9_cast_fp16")]; write_state(data = v_cache_9_cast_fp16, input = v_cache_1)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = v_cache_1)[name = string("coreml_update_state_62")]; tensor scattered_v_3_cast_fp16 = transpose(perm = scattered_v_3_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_85")]; tensor v_cache_11_cast_fp16 = add(x = coreml_update_state_62, y = scattered_v_3_cast_fp16)[name = string("v_cache_11_cast_fp16")]; write_state(data = v_cache_11_cast_fp16, input = v_cache_1)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = v_cache_1)[name = string("coreml_update_state_63")]; tensor var_424_axes_0 = const()[name = string("op_424_axes_0"), val = tensor([2])]; tensor var_424_cast_fp16 = expand_dims(axes = var_424_axes_0, x = coreml_update_state_61)[name = string("op_424_cast_fp16")]; tensor k_exp_5_reps_0 = const()[name = string("k_exp_5_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_5_cast_fp16 = tile(reps = k_exp_5_reps_0, x = var_424_cast_fp16)[name = string("k_exp_5_cast_fp16")]; tensor var_427 = const()[name = string("op_427"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_7_cast_fp16 = reshape(shape = var_427, x = k_exp_5_cast_fp16)[name = string("k_exp_7_cast_fp16")]; tensor var_429_axes_0 = const()[name = string("op_429_axes_0"), val = tensor([2])]; tensor var_429_cast_fp16 = expand_dims(axes = var_429_axes_0, x = coreml_update_state_63)[name = string("op_429_cast_fp16")]; tensor v_exp_5_reps_0 = const()[name = string("v_exp_5_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_5_cast_fp16 = tile(reps = v_exp_5_reps_0, x = var_429_cast_fp16)[name = string("v_exp_5_cast_fp16")]; tensor var_432 = const()[name = string("op_432"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_7_cast_fp16 = reshape(shape = var_432, x = v_exp_5_cast_fp16)[name = string("v_exp_7_cast_fp16")]; bool var_435_transpose_x_1 = const()[name = string("op_435_transpose_x_1"), val = bool(false)]; bool var_435_transpose_y_1 = const()[name = string("op_435_transpose_y_1"), val = bool(true)]; tensor var_435_cast_fp16 = matmul(transpose_x = var_435_transpose_x_1, transpose_y = var_435_transpose_y_1, x = q_3_cast_fp16, y = k_exp_7_cast_fp16)[name = string("op_435_cast_fp16")]; fp16 var_436_to_fp16 = const()[name = string("op_436_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_5_cast_fp16 = mul(x = var_435_cast_fp16, y = var_436_to_fp16)[name = string("attn_5_cast_fp16")]; tensor input_11_cast_fp16 = add(x = attn_5_cast_fp16, y = attention_mask_to_fp16)[name = string("input_11_cast_fp16")]; tensor attn_7_cast_fp16 = softmax(axis = var_293, x = input_11_cast_fp16)[name = string("attn_7_cast_fp16")]; bool out_3_transpose_x_0 = const()[name = string("out_3_transpose_x_0"), val = bool(false)]; bool out_3_transpose_y_0 = const()[name = string("out_3_transpose_y_0"), val = bool(false)]; tensor out_3_cast_fp16 = matmul(transpose_x = out_3_transpose_x_0, transpose_y = out_3_transpose_y_0, x = attn_7_cast_fp16, y = v_exp_7_cast_fp16)[name = string("out_3_cast_fp16")]; tensor var_441_perm_0 = const()[name = string("op_441_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_442 = const()[name = string("op_442"), val = tensor([1, 128, -1])]; tensor var_441_cast_fp16 = transpose(perm = var_441_perm_0, x = out_3_cast_fp16)[name = string("transpose_84")]; tensor input_13_cast_fp16 = reshape(shape = var_442, x = var_441_cast_fp16)[name = string("input_13_cast_fp16")]; tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22046912))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor x_45_cast_fp16 = add(x = x_25_cast_fp16, y = linear_10_cast_fp16)[name = string("x_45_cast_fp16")]; fp16 var_292_promoted_3_to_fp16 = const()[name = string("op_292_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_449_cast_fp16 = pow(x = x_45_cast_fp16, y = var_292_promoted_3_to_fp16)[name = string("op_449_cast_fp16")]; tensor var_451_axes_0 = const()[name = string("op_451_axes_0"), val = tensor([-1])]; bool var_451_keep_dims_0 = const()[name = string("op_451_keep_dims_0"), val = bool(true)]; tensor var_451_cast_fp16 = reduce_mean(axes = var_451_axes_0, keep_dims = var_451_keep_dims_0, x = var_449_cast_fp16)[name = string("op_451_cast_fp16")]; fp16 var_452_to_fp16 = const()[name = string("op_452_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_453_cast_fp16 = add(x = var_451_cast_fp16, y = var_452_to_fp16)[name = string("op_453_cast_fp16")]; fp32 norm_15_epsilon_0 = const()[name = string("norm_15_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_15_cast_fp16 = rsqrt(epsilon = norm_15_epsilon_0, x = var_453_cast_fp16)[name = string("norm_15_cast_fp16")]; tensor var_455_cast_fp16 = mul(x = x_45_cast_fp16, y = norm_15_cast_fp16)[name = string("op_455_cast_fp16")]; tensor layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22047488)))]; tensor var_456_cast_fp16 = mul(x = var_455_cast_fp16, y = layers_1_post_attention_layernorm_weight_to_fp16)[name = string("op_456_cast_fp16")]; tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22049600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195392))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = var_456_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_466_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_466_cast_fp16")]; tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28341760))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = var_456_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_19_cast_fp16 = mul(x = var_466_cast_fp16, y = linear_12_cast_fp16)[name = string("input_19_cast_fp16")]; tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28342336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488128))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_13_cast_fp16)[name = string("x_51_cast_fp16")]; int32 var_487 = const()[name = string("op_487"), val = int32(-1)]; fp16 var_486_promoted_to_fp16 = const()[name = string("op_486_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_496_cast_fp16 = pow(x = x_51_cast_fp16, y = var_486_promoted_to_fp16)[name = string("op_496_cast_fp16")]; tensor var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor([-1])]; bool var_498_keep_dims_0 = const()[name = string("op_498_keep_dims_0"), val = bool(true)]; tensor var_498_cast_fp16 = reduce_mean(axes = var_498_axes_0, keep_dims = var_498_keep_dims_0, x = var_496_cast_fp16)[name = string("op_498_cast_fp16")]; fp16 var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_500_cast_fp16 = add(x = var_498_cast_fp16, y = var_499_to_fp16)[name = string("op_500_cast_fp16")]; fp32 norm_17_epsilon_0 = const()[name = string("norm_17_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_17_cast_fp16 = rsqrt(epsilon = norm_17_epsilon_0, x = var_500_cast_fp16)[name = string("norm_17_cast_fp16")]; tensor var_502_cast_fp16 = mul(x = x_51_cast_fp16, y = norm_17_cast_fp16)[name = string("op_502_cast_fp16")]; tensor layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488704)))]; tensor var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = layers_2_input_layernorm_weight_to_fp16)[name = string("op_503_cast_fp16")]; tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588032))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = var_503_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor var_519 = const()[name = string("op_519"), val = tensor([1, 128, 16, 128])]; tensor var_520_cast_fp16 = reshape(shape = var_519, x = linear_14_cast_fp16)[name = string("op_520_cast_fp16")]; tensor x_57_perm_0 = const()[name = string("x_57_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637248))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = var_503_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor var_524 = const()[name = string("op_524"), val = tensor([1, 128, 8, 128])]; tensor var_525_cast_fp16 = reshape(shape = var_524, x = linear_15_cast_fp16)[name = string("op_525_cast_fp16")]; tensor x_61_perm_0 = const()[name = string("x_61_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35686464))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = var_503_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_529 = const()[name = string("op_529"), val = tensor([1, 128, 8, 128])]; tensor var_530_cast_fp16 = reshape(shape = var_529, x = linear_16_cast_fp16)[name = string("op_530_cast_fp16")]; tensor transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_486_promoted_1_to_fp16 = const()[name = string("op_486_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_57_cast_fp16 = transpose(perm = x_57_perm_0, x = var_520_cast_fp16)[name = string("transpose_83")]; tensor var_534_cast_fp16 = pow(x = x_57_cast_fp16, y = var_486_promoted_1_to_fp16)[name = string("op_534_cast_fp16")]; tensor var_536_axes_0 = const()[name = string("op_536_axes_0"), val = tensor([-1])]; bool var_536_keep_dims_0 = const()[name = string("op_536_keep_dims_0"), val = bool(true)]; tensor var_536_cast_fp16 = reduce_mean(axes = var_536_axes_0, keep_dims = var_536_keep_dims_0, x = var_534_cast_fp16)[name = string("op_536_cast_fp16")]; fp16 var_537_to_fp16 = const()[name = string("op_537_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_538_cast_fp16 = add(x = var_536_cast_fp16, y = var_537_to_fp16)[name = string("op_538_cast_fp16")]; fp32 norm_19_epsilon_0 = const()[name = string("norm_19_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_19_cast_fp16 = rsqrt(epsilon = norm_19_epsilon_0, x = var_538_cast_fp16)[name = string("norm_19_cast_fp16")]; tensor var_540_cast_fp16 = mul(x = x_57_cast_fp16, y = norm_19_cast_fp16)[name = string("op_540_cast_fp16")]; tensor layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687040)))]; tensor var_541_cast_fp16 = mul(x = var_540_cast_fp16, y = layers_2_self_attn_q_norm_weight_to_fp16)[name = string("op_541_cast_fp16")]; fp16 var_486_promoted_2_to_fp16 = const()[name = string("op_486_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_61_cast_fp16 = transpose(perm = x_61_perm_0, x = var_525_cast_fp16)[name = string("transpose_82")]; tensor var_545_cast_fp16 = pow(x = x_61_cast_fp16, y = var_486_promoted_2_to_fp16)[name = string("op_545_cast_fp16")]; tensor var_547_axes_0 = const()[name = string("op_547_axes_0"), val = tensor([-1])]; bool var_547_keep_dims_0 = const()[name = string("op_547_keep_dims_0"), val = bool(true)]; tensor var_547_cast_fp16 = reduce_mean(axes = var_547_axes_0, keep_dims = var_547_keep_dims_0, x = var_545_cast_fp16)[name = string("op_547_cast_fp16")]; fp16 var_548_to_fp16 = const()[name = string("op_548_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_549_cast_fp16 = add(x = var_547_cast_fp16, y = var_548_to_fp16)[name = string("op_549_cast_fp16")]; fp32 norm_21_epsilon_0 = const()[name = string("norm_21_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_21_cast_fp16 = rsqrt(epsilon = norm_21_epsilon_0, x = var_549_cast_fp16)[name = string("norm_21_cast_fp16")]; tensor var_551_cast_fp16 = mul(x = x_61_cast_fp16, y = norm_21_cast_fp16)[name = string("op_551_cast_fp16")]; tensor layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687360)))]; tensor var_552_cast_fp16 = mul(x = var_551_cast_fp16, y = layers_2_self_attn_k_norm_weight_to_fp16)[name = string("op_552_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_541_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_541_cast_fp16)[name = string("x2_9_cast_fp16")]; tensor var_573_cast_fp16 = mul(x = x1_9_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_573_cast_fp16")]; tensor var_574_cast_fp16 = mul(x = x2_9_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_574_cast_fp16")]; tensor var_575_cast_fp16 = sub(x = var_573_cast_fp16, y = var_574_cast_fp16)[name = string("op_575_cast_fp16")]; tensor var_576_cast_fp16 = mul(x = x2_9_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_576_cast_fp16")]; tensor var_577_cast_fp16 = mul(x = x1_9_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_577_cast_fp16")]; tensor var_578_cast_fp16 = add(x = var_576_cast_fp16, y = var_577_cast_fp16)[name = string("op_578_cast_fp16")]; bool q_5_interleave_0 = const()[name = string("q_5_interleave_0"), val = bool(false)]; tensor q_5_cast_fp16 = concat(axis = var_487, interleave = q_5_interleave_0, values = (var_575_cast_fp16, var_578_cast_fp16))[name = string("q_5_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_552_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_552_cast_fp16)[name = string("x2_11_cast_fp16")]; tensor var_600_cast_fp16 = mul(x = x1_11_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_600_cast_fp16")]; tensor var_601_cast_fp16 = mul(x = x2_11_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_601_cast_fp16")]; tensor var_602_cast_fp16 = sub(x = var_600_cast_fp16, y = var_601_cast_fp16)[name = string("op_602_cast_fp16")]; tensor var_603_cast_fp16 = mul(x = x2_11_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_603_cast_fp16")]; tensor var_604_cast_fp16 = mul(x = x1_11_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_604_cast_fp16")]; tensor var_605_cast_fp16 = add(x = var_603_cast_fp16, y = var_604_cast_fp16)[name = string("op_605_cast_fp16")]; bool var_607_interleave_0 = const()[name = string("op_607_interleave_0"), val = bool(false)]; tensor var_607_cast_fp16 = concat(axis = var_487, interleave = var_607_interleave_0, values = (var_602_cast_fp16, var_605_cast_fp16))[name = string("op_607_cast_fp16")]; tensor transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_40 = const()[name = string("concat_40"), val = tensor([128, 1024])]; tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = var_607_cast_fp16)[name = string("transpose_81")]; tensor reshape_13_cast_fp16 = reshape(shape = concat_40, x = transpose_9_cast_fp16)[name = string("reshape_13_cast_fp16")]; bool matmul_4_transpose_x_1 = const()[name = string("matmul_4_transpose_x_1"), val = bool(true)]; bool matmul_4_transpose_y_1 = const()[name = string("matmul_4_transpose_y_1"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_1, transpose_y = matmul_4_transpose_y_1, x = var_68_to_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor concat_43 = const()[name = string("concat_43"), val = tensor([1024, 1, 8, 128])]; tensor reshape_14_cast_fp16 = reshape(shape = concat_43, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")]; tensor scattered_k_5_perm_0 = const()[name = string("scattered_k_5_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_48 = const()[name = string("concat_48"), val = tensor([128, 1024])]; tensor transpose_58_cast_fp16 = transpose(perm = transpose_58_perm_0, x = var_530_cast_fp16)[name = string("transpose_80")]; tensor reshape_16_cast_fp16 = reshape(shape = concat_48, x = transpose_58_cast_fp16)[name = string("reshape_16_cast_fp16")]; bool matmul_5_transpose_x_1 = const()[name = string("matmul_5_transpose_x_1"), val = bool(true)]; bool matmul_5_transpose_y_1 = const()[name = string("matmul_5_transpose_y_1"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_1, transpose_y = matmul_5_transpose_y_1, x = var_68_to_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor concat_51 = const()[name = string("concat_51"), val = tensor([1024, 1, 8, 128])]; tensor reshape_17_cast_fp16 = reshape(shape = concat_51, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")]; tensor scattered_v_5_perm_0 = const()[name = string("scattered_v_5_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_4 = read_state(input = k_cache_2)[name = string("read_state_4")]; tensor k_cache_15_cast_fp16 = mul(x = read_state_4, y = var_224_cast_fp16)[name = string("k_cache_15_cast_fp16")]; write_state(data = k_cache_15_cast_fp16, input = k_cache_2)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = k_cache_2)[name = string("coreml_update_state_64")]; tensor scattered_k_5_cast_fp16 = transpose(perm = scattered_k_5_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_79")]; tensor k_cache_17_cast_fp16 = add(x = coreml_update_state_64, y = scattered_k_5_cast_fp16)[name = string("k_cache_17_cast_fp16")]; write_state(data = k_cache_17_cast_fp16, input = k_cache_2)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = k_cache_2)[name = string("coreml_update_state_65")]; tensor read_state_5 = read_state(input = v_cache_2)[name = string("read_state_5")]; tensor v_cache_15_cast_fp16 = mul(x = read_state_5, y = var_224_cast_fp16)[name = string("v_cache_15_cast_fp16")]; write_state(data = v_cache_15_cast_fp16, input = v_cache_2)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = v_cache_2)[name = string("coreml_update_state_66")]; tensor scattered_v_5_cast_fp16 = transpose(perm = scattered_v_5_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_78")]; tensor v_cache_17_cast_fp16 = add(x = coreml_update_state_66, y = scattered_v_5_cast_fp16)[name = string("v_cache_17_cast_fp16")]; write_state(data = v_cache_17_cast_fp16, input = v_cache_2)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = v_cache_2)[name = string("coreml_update_state_67")]; tensor var_618_axes_0 = const()[name = string("op_618_axes_0"), val = tensor([2])]; tensor var_618_cast_fp16 = expand_dims(axes = var_618_axes_0, x = coreml_update_state_65)[name = string("op_618_cast_fp16")]; tensor k_exp_9_reps_0 = const()[name = string("k_exp_9_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_9_cast_fp16 = tile(reps = k_exp_9_reps_0, x = var_618_cast_fp16)[name = string("k_exp_9_cast_fp16")]; tensor var_621 = const()[name = string("op_621"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_11_cast_fp16 = reshape(shape = var_621, x = k_exp_9_cast_fp16)[name = string("k_exp_11_cast_fp16")]; tensor var_623_axes_0 = const()[name = string("op_623_axes_0"), val = tensor([2])]; tensor var_623_cast_fp16 = expand_dims(axes = var_623_axes_0, x = coreml_update_state_67)[name = string("op_623_cast_fp16")]; tensor v_exp_9_reps_0 = const()[name = string("v_exp_9_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_9_cast_fp16 = tile(reps = v_exp_9_reps_0, x = var_623_cast_fp16)[name = string("v_exp_9_cast_fp16")]; tensor var_626 = const()[name = string("op_626"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_11_cast_fp16 = reshape(shape = var_626, x = v_exp_9_cast_fp16)[name = string("v_exp_11_cast_fp16")]; bool var_629_transpose_x_1 = const()[name = string("op_629_transpose_x_1"), val = bool(false)]; bool var_629_transpose_y_1 = const()[name = string("op_629_transpose_y_1"), val = bool(true)]; tensor var_629_cast_fp16 = matmul(transpose_x = var_629_transpose_x_1, transpose_y = var_629_transpose_y_1, x = q_5_cast_fp16, y = k_exp_11_cast_fp16)[name = string("op_629_cast_fp16")]; fp16 var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_9_cast_fp16 = mul(x = var_629_cast_fp16, y = var_630_to_fp16)[name = string("attn_9_cast_fp16")]; tensor input_21_cast_fp16 = add(x = attn_9_cast_fp16, y = attention_mask_to_fp16)[name = string("input_21_cast_fp16")]; tensor attn_11_cast_fp16 = softmax(axis = var_487, x = input_21_cast_fp16)[name = string("attn_11_cast_fp16")]; bool out_5_transpose_x_0 = const()[name = string("out_5_transpose_x_0"), val = bool(false)]; bool out_5_transpose_y_0 = const()[name = string("out_5_transpose_y_0"), val = bool(false)]; tensor out_5_cast_fp16 = matmul(transpose_x = out_5_transpose_x_0, transpose_y = out_5_transpose_y_0, x = attn_11_cast_fp16, y = v_exp_11_cast_fp16)[name = string("out_5_cast_fp16")]; tensor var_635_perm_0 = const()[name = string("op_635_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_636 = const()[name = string("op_636"), val = tensor([1, 128, -1])]; tensor var_635_cast_fp16 = transpose(perm = var_635_perm_0, x = out_5_cast_fp16)[name = string("transpose_77")]; tensor input_23_cast_fp16 = reshape(shape = var_636, x = var_635_cast_fp16)[name = string("input_23_cast_fp16")]; tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37784896))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor x_71_cast_fp16 = add(x = x_51_cast_fp16, y = linear_17_cast_fp16)[name = string("x_71_cast_fp16")]; fp16 var_486_promoted_3_to_fp16 = const()[name = string("op_486_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_643_cast_fp16 = pow(x = x_71_cast_fp16, y = var_486_promoted_3_to_fp16)[name = string("op_643_cast_fp16")]; tensor var_645_axes_0 = const()[name = string("op_645_axes_0"), val = tensor([-1])]; bool var_645_keep_dims_0 = const()[name = string("op_645_keep_dims_0"), val = bool(true)]; tensor var_645_cast_fp16 = reduce_mean(axes = var_645_axes_0, keep_dims = var_645_keep_dims_0, x = var_643_cast_fp16)[name = string("op_645_cast_fp16")]; fp16 var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_647_cast_fp16 = add(x = var_645_cast_fp16, y = var_646_to_fp16)[name = string("op_647_cast_fp16")]; fp32 norm_23_epsilon_0 = const()[name = string("norm_23_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_23_cast_fp16 = rsqrt(epsilon = norm_23_epsilon_0, x = var_647_cast_fp16)[name = string("norm_23_cast_fp16")]; tensor var_649_cast_fp16 = mul(x = x_71_cast_fp16, y = norm_23_cast_fp16)[name = string("op_649_cast_fp16")]; tensor layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37785472)))]; tensor var_650_cast_fp16 = mul(x = var_649_cast_fp16, y = layers_2_post_attention_layernorm_weight_to_fp16)[name = string("op_650_cast_fp16")]; tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37787584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933376))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = var_650_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_660_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_660_cast_fp16")]; tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44079744))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = var_650_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_29_cast_fp16 = mul(x = var_660_cast_fp16, y = linear_19_cast_fp16)[name = string("input_29_cast_fp16")]; tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44080320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226112))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor x_77_cast_fp16 = add(x = x_71_cast_fp16, y = linear_20_cast_fp16)[name = string("x_77_cast_fp16")]; int32 var_681 = const()[name = string("op_681"), val = int32(-1)]; fp16 var_680_promoted_to_fp16 = const()[name = string("op_680_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_690_cast_fp16 = pow(x = x_77_cast_fp16, y = var_680_promoted_to_fp16)[name = string("op_690_cast_fp16")]; tensor var_692_axes_0 = const()[name = string("op_692_axes_0"), val = tensor([-1])]; bool var_692_keep_dims_0 = const()[name = string("op_692_keep_dims_0"), val = bool(true)]; tensor var_692_cast_fp16 = reduce_mean(axes = var_692_axes_0, keep_dims = var_692_keep_dims_0, x = var_690_cast_fp16)[name = string("op_692_cast_fp16")]; fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_694_cast_fp16 = add(x = var_692_cast_fp16, y = var_693_to_fp16)[name = string("op_694_cast_fp16")]; fp32 norm_25_epsilon_0 = const()[name = string("norm_25_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_25_cast_fp16 = rsqrt(epsilon = norm_25_epsilon_0, x = var_694_cast_fp16)[name = string("norm_25_cast_fp16")]; tensor var_696_cast_fp16 = mul(x = x_77_cast_fp16, y = norm_25_cast_fp16)[name = string("op_696_cast_fp16")]; tensor layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226688)))]; tensor var_697_cast_fp16 = mul(x = var_696_cast_fp16, y = layers_3_input_layernorm_weight_to_fp16)[name = string("op_697_cast_fp16")]; tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326016))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = var_697_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor var_713 = const()[name = string("op_713"), val = tensor([1, 128, 16, 128])]; tensor var_714_cast_fp16 = reshape(shape = var_713, x = linear_21_cast_fp16)[name = string("op_714_cast_fp16")]; tensor x_83_perm_0 = const()[name = string("x_83_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375232))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = var_697_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor var_718 = const()[name = string("op_718"), val = tensor([1, 128, 8, 128])]; tensor var_719_cast_fp16 = reshape(shape = var_718, x = linear_22_cast_fp16)[name = string("op_719_cast_fp16")]; tensor x_87_perm_0 = const()[name = string("x_87_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51424448))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = var_697_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor var_723 = const()[name = string("op_723"), val = tensor([1, 128, 8, 128])]; tensor var_724_cast_fp16 = reshape(shape = var_723, x = linear_23_cast_fp16)[name = string("op_724_cast_fp16")]; tensor transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_680_promoted_1_to_fp16 = const()[name = string("op_680_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_83_cast_fp16 = transpose(perm = x_83_perm_0, x = var_714_cast_fp16)[name = string("transpose_76")]; tensor var_728_cast_fp16 = pow(x = x_83_cast_fp16, y = var_680_promoted_1_to_fp16)[name = string("op_728_cast_fp16")]; tensor var_730_axes_0 = const()[name = string("op_730_axes_0"), val = tensor([-1])]; bool var_730_keep_dims_0 = const()[name = string("op_730_keep_dims_0"), val = bool(true)]; tensor var_730_cast_fp16 = reduce_mean(axes = var_730_axes_0, keep_dims = var_730_keep_dims_0, x = var_728_cast_fp16)[name = string("op_730_cast_fp16")]; fp16 var_731_to_fp16 = const()[name = string("op_731_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_732_cast_fp16 = add(x = var_730_cast_fp16, y = var_731_to_fp16)[name = string("op_732_cast_fp16")]; fp32 norm_27_epsilon_0 = const()[name = string("norm_27_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_27_cast_fp16 = rsqrt(epsilon = norm_27_epsilon_0, x = var_732_cast_fp16)[name = string("norm_27_cast_fp16")]; tensor var_734_cast_fp16 = mul(x = x_83_cast_fp16, y = norm_27_cast_fp16)[name = string("op_734_cast_fp16")]; tensor layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425024)))]; tensor var_735_cast_fp16 = mul(x = var_734_cast_fp16, y = layers_3_self_attn_q_norm_weight_to_fp16)[name = string("op_735_cast_fp16")]; fp16 var_680_promoted_2_to_fp16 = const()[name = string("op_680_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_87_cast_fp16 = transpose(perm = x_87_perm_0, x = var_719_cast_fp16)[name = string("transpose_75")]; tensor var_739_cast_fp16 = pow(x = x_87_cast_fp16, y = var_680_promoted_2_to_fp16)[name = string("op_739_cast_fp16")]; tensor var_741_axes_0 = const()[name = string("op_741_axes_0"), val = tensor([-1])]; bool var_741_keep_dims_0 = const()[name = string("op_741_keep_dims_0"), val = bool(true)]; tensor var_741_cast_fp16 = reduce_mean(axes = var_741_axes_0, keep_dims = var_741_keep_dims_0, x = var_739_cast_fp16)[name = string("op_741_cast_fp16")]; fp16 var_742_to_fp16 = const()[name = string("op_742_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_743_cast_fp16 = add(x = var_741_cast_fp16, y = var_742_to_fp16)[name = string("op_743_cast_fp16")]; fp32 norm_29_epsilon_0 = const()[name = string("norm_29_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_29_cast_fp16 = rsqrt(epsilon = norm_29_epsilon_0, x = var_743_cast_fp16)[name = string("norm_29_cast_fp16")]; tensor var_745_cast_fp16 = mul(x = x_87_cast_fp16, y = norm_29_cast_fp16)[name = string("op_745_cast_fp16")]; tensor layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425344)))]; tensor var_746_cast_fp16 = mul(x = var_745_cast_fp16, y = layers_3_self_attn_k_norm_weight_to_fp16)[name = string("op_746_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_735_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_735_cast_fp16)[name = string("x2_13_cast_fp16")]; tensor var_767_cast_fp16 = mul(x = x1_13_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_767_cast_fp16")]; tensor var_768_cast_fp16 = mul(x = x2_13_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_768_cast_fp16")]; tensor var_769_cast_fp16 = sub(x = var_767_cast_fp16, y = var_768_cast_fp16)[name = string("op_769_cast_fp16")]; tensor var_770_cast_fp16 = mul(x = x2_13_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_770_cast_fp16")]; tensor var_771_cast_fp16 = mul(x = x1_13_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_771_cast_fp16")]; tensor var_772_cast_fp16 = add(x = var_770_cast_fp16, y = var_771_cast_fp16)[name = string("op_772_cast_fp16")]; bool q_7_interleave_0 = const()[name = string("q_7_interleave_0"), val = bool(false)]; tensor q_7_cast_fp16 = concat(axis = var_681, interleave = q_7_interleave_0, values = (var_769_cast_fp16, var_772_cast_fp16))[name = string("q_7_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_746_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_746_cast_fp16)[name = string("x2_15_cast_fp16")]; tensor var_794_cast_fp16 = mul(x = x1_15_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_794_cast_fp16")]; tensor var_795_cast_fp16 = mul(x = x2_15_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_795_cast_fp16")]; tensor var_796_cast_fp16 = sub(x = var_794_cast_fp16, y = var_795_cast_fp16)[name = string("op_796_cast_fp16")]; tensor var_797_cast_fp16 = mul(x = x2_15_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_797_cast_fp16")]; tensor var_798_cast_fp16 = mul(x = x1_15_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_798_cast_fp16")]; tensor var_799_cast_fp16 = add(x = var_797_cast_fp16, y = var_798_cast_fp16)[name = string("op_799_cast_fp16")]; bool var_801_interleave_0 = const()[name = string("op_801_interleave_0"), val = bool(false)]; tensor var_801_cast_fp16 = concat(axis = var_681, interleave = var_801_interleave_0, values = (var_796_cast_fp16, var_799_cast_fp16))[name = string("op_801_cast_fp16")]; tensor transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_58 = const()[name = string("concat_58"), val = tensor([128, 1024])]; tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = var_801_cast_fp16)[name = string("transpose_74")]; tensor reshape_19_cast_fp16 = reshape(shape = concat_58, x = transpose_13_cast_fp16)[name = string("reshape_19_cast_fp16")]; bool matmul_6_transpose_x_1 = const()[name = string("matmul_6_transpose_x_1"), val = bool(true)]; bool matmul_6_transpose_y_1 = const()[name = string("matmul_6_transpose_y_1"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_1, transpose_y = matmul_6_transpose_y_1, x = var_68_to_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor concat_61 = const()[name = string("concat_61"), val = tensor([1024, 1, 8, 128])]; tensor reshape_20_cast_fp16 = reshape(shape = concat_61, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")]; tensor scattered_k_7_perm_0 = const()[name = string("scattered_k_7_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_66 = const()[name = string("concat_66"), val = tensor([128, 1024])]; tensor transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = var_724_cast_fp16)[name = string("transpose_73")]; tensor reshape_22_cast_fp16 = reshape(shape = concat_66, x = transpose_59_cast_fp16)[name = string("reshape_22_cast_fp16")]; bool matmul_7_transpose_x_1 = const()[name = string("matmul_7_transpose_x_1"), val = bool(true)]; bool matmul_7_transpose_y_1 = const()[name = string("matmul_7_transpose_y_1"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_1, transpose_y = matmul_7_transpose_y_1, x = var_68_to_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor concat_69 = const()[name = string("concat_69"), val = tensor([1024, 1, 8, 128])]; tensor reshape_23_cast_fp16 = reshape(shape = concat_69, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")]; tensor scattered_v_7_perm_0 = const()[name = string("scattered_v_7_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_6 = read_state(input = k_cache_3)[name = string("read_state_6")]; tensor k_cache_21_cast_fp16 = mul(x = read_state_6, y = var_224_cast_fp16)[name = string("k_cache_21_cast_fp16")]; write_state(data = k_cache_21_cast_fp16, input = k_cache_3)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = k_cache_3)[name = string("coreml_update_state_68")]; tensor scattered_k_7_cast_fp16 = transpose(perm = scattered_k_7_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_72")]; tensor k_cache_23_cast_fp16 = add(x = coreml_update_state_68, y = scattered_k_7_cast_fp16)[name = string("k_cache_23_cast_fp16")]; write_state(data = k_cache_23_cast_fp16, input = k_cache_3)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = k_cache_3)[name = string("coreml_update_state_69")]; tensor read_state_7 = read_state(input = v_cache_3)[name = string("read_state_7")]; tensor v_cache_21_cast_fp16 = mul(x = read_state_7, y = var_224_cast_fp16)[name = string("v_cache_21_cast_fp16")]; write_state(data = v_cache_21_cast_fp16, input = v_cache_3)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = v_cache_3)[name = string("coreml_update_state_70")]; tensor scattered_v_7_cast_fp16 = transpose(perm = scattered_v_7_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_71")]; tensor v_cache_23_cast_fp16 = add(x = coreml_update_state_70, y = scattered_v_7_cast_fp16)[name = string("v_cache_23_cast_fp16")]; write_state(data = v_cache_23_cast_fp16, input = v_cache_3)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = v_cache_3)[name = string("coreml_update_state_71")]; tensor var_812_axes_0 = const()[name = string("op_812_axes_0"), val = tensor([2])]; tensor var_812_cast_fp16 = expand_dims(axes = var_812_axes_0, x = coreml_update_state_69)[name = string("op_812_cast_fp16")]; tensor k_exp_13_reps_0 = const()[name = string("k_exp_13_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_13_cast_fp16 = tile(reps = k_exp_13_reps_0, x = var_812_cast_fp16)[name = string("k_exp_13_cast_fp16")]; tensor var_815 = const()[name = string("op_815"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_15_cast_fp16 = reshape(shape = var_815, x = k_exp_13_cast_fp16)[name = string("k_exp_15_cast_fp16")]; tensor var_817_axes_0 = const()[name = string("op_817_axes_0"), val = tensor([2])]; tensor var_817_cast_fp16 = expand_dims(axes = var_817_axes_0, x = coreml_update_state_71)[name = string("op_817_cast_fp16")]; tensor v_exp_13_reps_0 = const()[name = string("v_exp_13_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_13_cast_fp16 = tile(reps = v_exp_13_reps_0, x = var_817_cast_fp16)[name = string("v_exp_13_cast_fp16")]; tensor var_820 = const()[name = string("op_820"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_15_cast_fp16 = reshape(shape = var_820, x = v_exp_13_cast_fp16)[name = string("v_exp_15_cast_fp16")]; bool var_823_transpose_x_1 = const()[name = string("op_823_transpose_x_1"), val = bool(false)]; bool var_823_transpose_y_1 = const()[name = string("op_823_transpose_y_1"), val = bool(true)]; tensor var_823_cast_fp16 = matmul(transpose_x = var_823_transpose_x_1, transpose_y = var_823_transpose_y_1, x = q_7_cast_fp16, y = k_exp_15_cast_fp16)[name = string("op_823_cast_fp16")]; fp16 var_824_to_fp16 = const()[name = string("op_824_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_13_cast_fp16 = mul(x = var_823_cast_fp16, y = var_824_to_fp16)[name = string("attn_13_cast_fp16")]; tensor input_31_cast_fp16 = add(x = attn_13_cast_fp16, y = attention_mask_to_fp16)[name = string("input_31_cast_fp16")]; tensor attn_15_cast_fp16 = softmax(axis = var_681, x = input_31_cast_fp16)[name = string("attn_15_cast_fp16")]; bool out_7_transpose_x_0 = const()[name = string("out_7_transpose_x_0"), val = bool(false)]; bool out_7_transpose_y_0 = const()[name = string("out_7_transpose_y_0"), val = bool(false)]; tensor out_7_cast_fp16 = matmul(transpose_x = out_7_transpose_x_0, transpose_y = out_7_transpose_y_0, x = attn_15_cast_fp16, y = v_exp_15_cast_fp16)[name = string("out_7_cast_fp16")]; tensor var_829_perm_0 = const()[name = string("op_829_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_830 = const()[name = string("op_830"), val = tensor([1, 128, -1])]; tensor var_829_cast_fp16 = transpose(perm = var_829_perm_0, x = out_7_cast_fp16)[name = string("transpose_70")]; tensor input_33_cast_fp16 = reshape(shape = var_830, x = var_829_cast_fp16)[name = string("input_33_cast_fp16")]; tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53522880))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor x_97_cast_fp16 = add(x = x_77_cast_fp16, y = linear_24_cast_fp16)[name = string("x_97_cast_fp16")]; fp16 var_680_promoted_3_to_fp16 = const()[name = string("op_680_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_837_cast_fp16 = pow(x = x_97_cast_fp16, y = var_680_promoted_3_to_fp16)[name = string("op_837_cast_fp16")]; tensor var_839_axes_0 = const()[name = string("op_839_axes_0"), val = tensor([-1])]; bool var_839_keep_dims_0 = const()[name = string("op_839_keep_dims_0"), val = bool(true)]; tensor var_839_cast_fp16 = reduce_mean(axes = var_839_axes_0, keep_dims = var_839_keep_dims_0, x = var_837_cast_fp16)[name = string("op_839_cast_fp16")]; fp16 var_840_to_fp16 = const()[name = string("op_840_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_841_cast_fp16 = add(x = var_839_cast_fp16, y = var_840_to_fp16)[name = string("op_841_cast_fp16")]; fp32 norm_31_epsilon_0 = const()[name = string("norm_31_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_31_cast_fp16 = rsqrt(epsilon = norm_31_epsilon_0, x = var_841_cast_fp16)[name = string("norm_31_cast_fp16")]; tensor var_843_cast_fp16 = mul(x = x_97_cast_fp16, y = norm_31_cast_fp16)[name = string("op_843_cast_fp16")]; tensor layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53523456)))]; tensor var_844_cast_fp16 = mul(x = var_843_cast_fp16, y = layers_3_post_attention_layernorm_weight_to_fp16)[name = string("op_844_cast_fp16")]; tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53525568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671360))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = var_844_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_854_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_854_cast_fp16")]; tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59817728))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = var_844_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_39_cast_fp16 = mul(x = var_854_cast_fp16, y = linear_26_cast_fp16)[name = string("input_39_cast_fp16")]; tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59818304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964096))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_27_cast_fp16)[name = string("x_103_cast_fp16")]; int32 var_875 = const()[name = string("op_875"), val = int32(-1)]; fp16 var_874_promoted_to_fp16 = const()[name = string("op_874_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_884_cast_fp16 = pow(x = x_103_cast_fp16, y = var_874_promoted_to_fp16)[name = string("op_884_cast_fp16")]; tensor var_886_axes_0 = const()[name = string("op_886_axes_0"), val = tensor([-1])]; bool var_886_keep_dims_0 = const()[name = string("op_886_keep_dims_0"), val = bool(true)]; tensor var_886_cast_fp16 = reduce_mean(axes = var_886_axes_0, keep_dims = var_886_keep_dims_0, x = var_884_cast_fp16)[name = string("op_886_cast_fp16")]; fp16 var_887_to_fp16 = const()[name = string("op_887_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_888_cast_fp16 = add(x = var_886_cast_fp16, y = var_887_to_fp16)[name = string("op_888_cast_fp16")]; fp32 norm_33_epsilon_0 = const()[name = string("norm_33_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_33_cast_fp16 = rsqrt(epsilon = norm_33_epsilon_0, x = var_888_cast_fp16)[name = string("norm_33_cast_fp16")]; tensor var_890_cast_fp16 = mul(x = x_103_cast_fp16, y = norm_33_cast_fp16)[name = string("op_890_cast_fp16")]; tensor layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964672)))]; tensor var_891_cast_fp16 = mul(x = var_890_cast_fp16, y = layers_4_input_layernorm_weight_to_fp16)[name = string("op_891_cast_fp16")]; tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064000))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = var_891_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor var_907 = const()[name = string("op_907"), val = tensor([1, 128, 16, 128])]; tensor var_908_cast_fp16 = reshape(shape = var_907, x = linear_28_cast_fp16)[name = string("op_908_cast_fp16")]; tensor x_109_perm_0 = const()[name = string("x_109_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113216))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = var_891_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor var_912 = const()[name = string("op_912"), val = tensor([1, 128, 8, 128])]; tensor var_913_cast_fp16 = reshape(shape = var_912, x = linear_29_cast_fp16)[name = string("op_913_cast_fp16")]; tensor x_113_perm_0 = const()[name = string("x_113_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67162432))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = var_891_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor var_917 = const()[name = string("op_917"), val = tensor([1, 128, 8, 128])]; tensor var_918_cast_fp16 = reshape(shape = var_917, x = linear_30_cast_fp16)[name = string("op_918_cast_fp16")]; tensor transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_874_promoted_1_to_fp16 = const()[name = string("op_874_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_109_cast_fp16 = transpose(perm = x_109_perm_0, x = var_908_cast_fp16)[name = string("transpose_69")]; tensor var_922_cast_fp16 = pow(x = x_109_cast_fp16, y = var_874_promoted_1_to_fp16)[name = string("op_922_cast_fp16")]; tensor var_924_axes_0 = const()[name = string("op_924_axes_0"), val = tensor([-1])]; bool var_924_keep_dims_0 = const()[name = string("op_924_keep_dims_0"), val = bool(true)]; tensor var_924_cast_fp16 = reduce_mean(axes = var_924_axes_0, keep_dims = var_924_keep_dims_0, x = var_922_cast_fp16)[name = string("op_924_cast_fp16")]; fp16 var_925_to_fp16 = const()[name = string("op_925_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_926_cast_fp16 = add(x = var_924_cast_fp16, y = var_925_to_fp16)[name = string("op_926_cast_fp16")]; fp32 norm_35_epsilon_0 = const()[name = string("norm_35_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_35_cast_fp16 = rsqrt(epsilon = norm_35_epsilon_0, x = var_926_cast_fp16)[name = string("norm_35_cast_fp16")]; tensor var_928_cast_fp16 = mul(x = x_109_cast_fp16, y = norm_35_cast_fp16)[name = string("op_928_cast_fp16")]; tensor layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163008)))]; tensor var_929_cast_fp16 = mul(x = var_928_cast_fp16, y = layers_4_self_attn_q_norm_weight_to_fp16)[name = string("op_929_cast_fp16")]; fp16 var_874_promoted_2_to_fp16 = const()[name = string("op_874_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_113_cast_fp16 = transpose(perm = x_113_perm_0, x = var_913_cast_fp16)[name = string("transpose_68")]; tensor var_933_cast_fp16 = pow(x = x_113_cast_fp16, y = var_874_promoted_2_to_fp16)[name = string("op_933_cast_fp16")]; tensor var_935_axes_0 = const()[name = string("op_935_axes_0"), val = tensor([-1])]; bool var_935_keep_dims_0 = const()[name = string("op_935_keep_dims_0"), val = bool(true)]; tensor var_935_cast_fp16 = reduce_mean(axes = var_935_axes_0, keep_dims = var_935_keep_dims_0, x = var_933_cast_fp16)[name = string("op_935_cast_fp16")]; fp16 var_936_to_fp16 = const()[name = string("op_936_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_937_cast_fp16 = add(x = var_935_cast_fp16, y = var_936_to_fp16)[name = string("op_937_cast_fp16")]; fp32 norm_37_epsilon_0 = const()[name = string("norm_37_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_37_cast_fp16 = rsqrt(epsilon = norm_37_epsilon_0, x = var_937_cast_fp16)[name = string("norm_37_cast_fp16")]; tensor var_939_cast_fp16 = mul(x = x_113_cast_fp16, y = norm_37_cast_fp16)[name = string("op_939_cast_fp16")]; tensor layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163328)))]; tensor var_940_cast_fp16 = mul(x = var_939_cast_fp16, y = layers_4_self_attn_k_norm_weight_to_fp16)[name = string("op_940_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_929_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_929_cast_fp16)[name = string("x2_17_cast_fp16")]; tensor var_961_cast_fp16 = mul(x = x1_17_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_961_cast_fp16")]; tensor var_962_cast_fp16 = mul(x = x2_17_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_962_cast_fp16")]; tensor var_963_cast_fp16 = sub(x = var_961_cast_fp16, y = var_962_cast_fp16)[name = string("op_963_cast_fp16")]; tensor var_964_cast_fp16 = mul(x = x2_17_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_964_cast_fp16")]; tensor var_965_cast_fp16 = mul(x = x1_17_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_965_cast_fp16")]; tensor var_966_cast_fp16 = add(x = var_964_cast_fp16, y = var_965_cast_fp16)[name = string("op_966_cast_fp16")]; bool q_9_interleave_0 = const()[name = string("q_9_interleave_0"), val = bool(false)]; tensor q_9_cast_fp16 = concat(axis = var_875, interleave = q_9_interleave_0, values = (var_963_cast_fp16, var_966_cast_fp16))[name = string("q_9_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_940_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_940_cast_fp16)[name = string("x2_19_cast_fp16")]; tensor var_988_cast_fp16 = mul(x = x1_19_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_988_cast_fp16")]; tensor var_989_cast_fp16 = mul(x = x2_19_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_989_cast_fp16")]; tensor var_990_cast_fp16 = sub(x = var_988_cast_fp16, y = var_989_cast_fp16)[name = string("op_990_cast_fp16")]; tensor var_991_cast_fp16 = mul(x = x2_19_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_991_cast_fp16")]; tensor var_992_cast_fp16 = mul(x = x1_19_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_992_cast_fp16")]; tensor var_993_cast_fp16 = add(x = var_991_cast_fp16, y = var_992_cast_fp16)[name = string("op_993_cast_fp16")]; bool var_995_interleave_0 = const()[name = string("op_995_interleave_0"), val = bool(false)]; tensor var_995_cast_fp16 = concat(axis = var_875, interleave = var_995_interleave_0, values = (var_990_cast_fp16, var_993_cast_fp16))[name = string("op_995_cast_fp16")]; tensor transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_76 = const()[name = string("concat_76"), val = tensor([128, 1024])]; tensor transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = var_995_cast_fp16)[name = string("transpose_67")]; tensor reshape_25_cast_fp16 = reshape(shape = concat_76, x = transpose_17_cast_fp16)[name = string("reshape_25_cast_fp16")]; bool matmul_8_transpose_x_1 = const()[name = string("matmul_8_transpose_x_1"), val = bool(true)]; bool matmul_8_transpose_y_1 = const()[name = string("matmul_8_transpose_y_1"), val = bool(false)]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_1, transpose_y = matmul_8_transpose_y_1, x = var_68_to_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")]; tensor concat_79 = const()[name = string("concat_79"), val = tensor([1024, 1, 8, 128])]; tensor reshape_26_cast_fp16 = reshape(shape = concat_79, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")]; tensor scattered_k_9_perm_0 = const()[name = string("scattered_k_9_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_84 = const()[name = string("concat_84"), val = tensor([128, 1024])]; tensor transpose_60_cast_fp16 = transpose(perm = transpose_60_perm_0, x = var_918_cast_fp16)[name = string("transpose_66")]; tensor reshape_28_cast_fp16 = reshape(shape = concat_84, x = transpose_60_cast_fp16)[name = string("reshape_28_cast_fp16")]; bool matmul_9_transpose_x_1 = const()[name = string("matmul_9_transpose_x_1"), val = bool(true)]; bool matmul_9_transpose_y_1 = const()[name = string("matmul_9_transpose_y_1"), val = bool(false)]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_1, transpose_y = matmul_9_transpose_y_1, x = var_68_to_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")]; tensor concat_87 = const()[name = string("concat_87"), val = tensor([1024, 1, 8, 128])]; tensor reshape_29_cast_fp16 = reshape(shape = concat_87, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")]; tensor scattered_v_9_perm_0 = const()[name = string("scattered_v_9_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_8 = read_state(input = k_cache_4)[name = string("read_state_8")]; tensor k_cache_27_cast_fp16 = mul(x = read_state_8, y = var_224_cast_fp16)[name = string("k_cache_27_cast_fp16")]; write_state(data = k_cache_27_cast_fp16, input = k_cache_4)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = k_cache_4)[name = string("coreml_update_state_72")]; tensor scattered_k_9_cast_fp16 = transpose(perm = scattered_k_9_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_65")]; tensor k_cache_29_cast_fp16 = add(x = coreml_update_state_72, y = scattered_k_9_cast_fp16)[name = string("k_cache_29_cast_fp16")]; write_state(data = k_cache_29_cast_fp16, input = k_cache_4)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = k_cache_4)[name = string("coreml_update_state_73")]; tensor read_state_9 = read_state(input = v_cache_4)[name = string("read_state_9")]; tensor v_cache_27_cast_fp16 = mul(x = read_state_9, y = var_224_cast_fp16)[name = string("v_cache_27_cast_fp16")]; write_state(data = v_cache_27_cast_fp16, input = v_cache_4)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = v_cache_4)[name = string("coreml_update_state_74")]; tensor scattered_v_9_cast_fp16 = transpose(perm = scattered_v_9_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_64")]; tensor v_cache_29_cast_fp16 = add(x = coreml_update_state_74, y = scattered_v_9_cast_fp16)[name = string("v_cache_29_cast_fp16")]; write_state(data = v_cache_29_cast_fp16, input = v_cache_4)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = v_cache_4)[name = string("coreml_update_state_75")]; tensor var_1006_axes_0 = const()[name = string("op_1006_axes_0"), val = tensor([2])]; tensor var_1006_cast_fp16 = expand_dims(axes = var_1006_axes_0, x = coreml_update_state_73)[name = string("op_1006_cast_fp16")]; tensor k_exp_17_reps_0 = const()[name = string("k_exp_17_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_17_cast_fp16 = tile(reps = k_exp_17_reps_0, x = var_1006_cast_fp16)[name = string("k_exp_17_cast_fp16")]; tensor var_1009 = const()[name = string("op_1009"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_19_cast_fp16 = reshape(shape = var_1009, x = k_exp_17_cast_fp16)[name = string("k_exp_19_cast_fp16")]; tensor var_1011_axes_0 = const()[name = string("op_1011_axes_0"), val = tensor([2])]; tensor var_1011_cast_fp16 = expand_dims(axes = var_1011_axes_0, x = coreml_update_state_75)[name = string("op_1011_cast_fp16")]; tensor v_exp_17_reps_0 = const()[name = string("v_exp_17_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_17_cast_fp16 = tile(reps = v_exp_17_reps_0, x = var_1011_cast_fp16)[name = string("v_exp_17_cast_fp16")]; tensor var_1014 = const()[name = string("op_1014"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_19_cast_fp16 = reshape(shape = var_1014, x = v_exp_17_cast_fp16)[name = string("v_exp_19_cast_fp16")]; bool var_1017_transpose_x_1 = const()[name = string("op_1017_transpose_x_1"), val = bool(false)]; bool var_1017_transpose_y_1 = const()[name = string("op_1017_transpose_y_1"), val = bool(true)]; tensor var_1017_cast_fp16 = matmul(transpose_x = var_1017_transpose_x_1, transpose_y = var_1017_transpose_y_1, x = q_9_cast_fp16, y = k_exp_19_cast_fp16)[name = string("op_1017_cast_fp16")]; fp16 var_1018_to_fp16 = const()[name = string("op_1018_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_17_cast_fp16 = mul(x = var_1017_cast_fp16, y = var_1018_to_fp16)[name = string("attn_17_cast_fp16")]; tensor input_41_cast_fp16 = add(x = attn_17_cast_fp16, y = attention_mask_to_fp16)[name = string("input_41_cast_fp16")]; tensor attn_19_cast_fp16 = softmax(axis = var_875, x = input_41_cast_fp16)[name = string("attn_19_cast_fp16")]; bool out_9_transpose_x_0 = const()[name = string("out_9_transpose_x_0"), val = bool(false)]; bool out_9_transpose_y_0 = const()[name = string("out_9_transpose_y_0"), val = bool(false)]; tensor out_9_cast_fp16 = matmul(transpose_x = out_9_transpose_x_0, transpose_y = out_9_transpose_y_0, x = attn_19_cast_fp16, y = v_exp_19_cast_fp16)[name = string("out_9_cast_fp16")]; tensor var_1023_perm_0 = const()[name = string("op_1023_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1024 = const()[name = string("op_1024"), val = tensor([1, 128, -1])]; tensor var_1023_cast_fp16 = transpose(perm = var_1023_perm_0, x = out_9_cast_fp16)[name = string("transpose_63")]; tensor input_43_cast_fp16 = reshape(shape = var_1024, x = var_1023_cast_fp16)[name = string("input_43_cast_fp16")]; tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69260864))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor x_123_cast_fp16 = add(x = x_103_cast_fp16, y = linear_31_cast_fp16)[name = string("x_123_cast_fp16")]; fp16 var_874_promoted_3_to_fp16 = const()[name = string("op_874_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1031_cast_fp16 = pow(x = x_123_cast_fp16, y = var_874_promoted_3_to_fp16)[name = string("op_1031_cast_fp16")]; tensor var_1033_axes_0 = const()[name = string("op_1033_axes_0"), val = tensor([-1])]; bool var_1033_keep_dims_0 = const()[name = string("op_1033_keep_dims_0"), val = bool(true)]; tensor var_1033_cast_fp16 = reduce_mean(axes = var_1033_axes_0, keep_dims = var_1033_keep_dims_0, x = var_1031_cast_fp16)[name = string("op_1033_cast_fp16")]; fp16 var_1034_to_fp16 = const()[name = string("op_1034_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1035_cast_fp16 = add(x = var_1033_cast_fp16, y = var_1034_to_fp16)[name = string("op_1035_cast_fp16")]; fp32 norm_39_epsilon_0 = const()[name = string("norm_39_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_39_cast_fp16 = rsqrt(epsilon = norm_39_epsilon_0, x = var_1035_cast_fp16)[name = string("norm_39_cast_fp16")]; tensor var_1037_cast_fp16 = mul(x = x_123_cast_fp16, y = norm_39_cast_fp16)[name = string("op_1037_cast_fp16")]; tensor layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69261440)))]; tensor var_1038_cast_fp16 = mul(x = var_1037_cast_fp16, y = layers_4_post_attention_layernorm_weight_to_fp16)[name = string("op_1038_cast_fp16")]; tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69263552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409344))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = var_1038_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1048_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1048_cast_fp16")]; tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75555712))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = var_1038_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_49_cast_fp16 = mul(x = var_1048_cast_fp16, y = linear_33_cast_fp16)[name = string("input_49_cast_fp16")]; tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702080))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_34_cast_fp16)[name = string("x_129_cast_fp16")]; int32 var_1069 = const()[name = string("op_1069"), val = int32(-1)]; fp16 var_1068_promoted_to_fp16 = const()[name = string("op_1068_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1078_cast_fp16 = pow(x = x_129_cast_fp16, y = var_1068_promoted_to_fp16)[name = string("op_1078_cast_fp16")]; tensor var_1080_axes_0 = const()[name = string("op_1080_axes_0"), val = tensor([-1])]; bool var_1080_keep_dims_0 = const()[name = string("op_1080_keep_dims_0"), val = bool(true)]; tensor var_1080_cast_fp16 = reduce_mean(axes = var_1080_axes_0, keep_dims = var_1080_keep_dims_0, x = var_1078_cast_fp16)[name = string("op_1080_cast_fp16")]; fp16 var_1081_to_fp16 = const()[name = string("op_1081_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1082_cast_fp16 = add(x = var_1080_cast_fp16, y = var_1081_to_fp16)[name = string("op_1082_cast_fp16")]; fp32 norm_41_epsilon_0 = const()[name = string("norm_41_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_41_cast_fp16 = rsqrt(epsilon = norm_41_epsilon_0, x = var_1082_cast_fp16)[name = string("norm_41_cast_fp16")]; tensor var_1084_cast_fp16 = mul(x = x_129_cast_fp16, y = norm_41_cast_fp16)[name = string("op_1084_cast_fp16")]; tensor layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702656)))]; tensor var_1085_cast_fp16 = mul(x = var_1084_cast_fp16, y = layers_5_input_layernorm_weight_to_fp16)[name = string("op_1085_cast_fp16")]; tensor layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80801984))))[name = string("layers_5_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = var_1085_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor var_1101 = const()[name = string("op_1101"), val = tensor([1, 128, 16, 128])]; tensor var_1102_cast_fp16 = reshape(shape = var_1101, x = linear_35_cast_fp16)[name = string("op_1102_cast_fp16")]; tensor x_135_perm_0 = const()[name = string("x_135_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80802560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851200))))[name = string("layers_5_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = var_1085_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor var_1106 = const()[name = string("op_1106"), val = tensor([1, 128, 8, 128])]; tensor var_1107_cast_fp16 = reshape(shape = var_1106, x = linear_36_cast_fp16)[name = string("op_1107_cast_fp16")]; tensor x_139_perm_0 = const()[name = string("x_139_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900416))))[name = string("layers_5_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = var_1085_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor var_1111 = const()[name = string("op_1111"), val = tensor([1, 128, 8, 128])]; tensor var_1112_cast_fp16 = reshape(shape = var_1111, x = linear_37_cast_fp16)[name = string("op_1112_cast_fp16")]; tensor transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_1068_promoted_1_to_fp16 = const()[name = string("op_1068_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_135_cast_fp16 = transpose(perm = x_135_perm_0, x = var_1102_cast_fp16)[name = string("transpose_62")]; tensor var_1116_cast_fp16 = pow(x = x_135_cast_fp16, y = var_1068_promoted_1_to_fp16)[name = string("op_1116_cast_fp16")]; tensor var_1118_axes_0 = const()[name = string("op_1118_axes_0"), val = tensor([-1])]; bool var_1118_keep_dims_0 = const()[name = string("op_1118_keep_dims_0"), val = bool(true)]; tensor var_1118_cast_fp16 = reduce_mean(axes = var_1118_axes_0, keep_dims = var_1118_keep_dims_0, x = var_1116_cast_fp16)[name = string("op_1118_cast_fp16")]; fp16 var_1119_to_fp16 = const()[name = string("op_1119_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1120_cast_fp16 = add(x = var_1118_cast_fp16, y = var_1119_to_fp16)[name = string("op_1120_cast_fp16")]; fp32 norm_43_epsilon_0 = const()[name = string("norm_43_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_43_cast_fp16 = rsqrt(epsilon = norm_43_epsilon_0, x = var_1120_cast_fp16)[name = string("norm_43_cast_fp16")]; tensor var_1122_cast_fp16 = mul(x = x_135_cast_fp16, y = norm_43_cast_fp16)[name = string("op_1122_cast_fp16")]; tensor layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900992)))]; tensor var_1123_cast_fp16 = mul(x = var_1122_cast_fp16, y = layers_5_self_attn_q_norm_weight_to_fp16)[name = string("op_1123_cast_fp16")]; fp16 var_1068_promoted_2_to_fp16 = const()[name = string("op_1068_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_139_cast_fp16 = transpose(perm = x_139_perm_0, x = var_1107_cast_fp16)[name = string("transpose_61")]; tensor var_1127_cast_fp16 = pow(x = x_139_cast_fp16, y = var_1068_promoted_2_to_fp16)[name = string("op_1127_cast_fp16")]; tensor var_1129_axes_0 = const()[name = string("op_1129_axes_0"), val = tensor([-1])]; bool var_1129_keep_dims_0 = const()[name = string("op_1129_keep_dims_0"), val = bool(true)]; tensor var_1129_cast_fp16 = reduce_mean(axes = var_1129_axes_0, keep_dims = var_1129_keep_dims_0, x = var_1127_cast_fp16)[name = string("op_1129_cast_fp16")]; fp16 var_1130_to_fp16 = const()[name = string("op_1130_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1131_cast_fp16 = add(x = var_1129_cast_fp16, y = var_1130_to_fp16)[name = string("op_1131_cast_fp16")]; fp32 norm_45_epsilon_0 = const()[name = string("norm_45_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_45_cast_fp16 = rsqrt(epsilon = norm_45_epsilon_0, x = var_1131_cast_fp16)[name = string("norm_45_cast_fp16")]; tensor var_1133_cast_fp16 = mul(x = x_139_cast_fp16, y = norm_45_cast_fp16)[name = string("op_1133_cast_fp16")]; tensor layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901312)))]; tensor var_1134_cast_fp16 = mul(x = var_1133_cast_fp16, y = layers_5_self_attn_k_norm_weight_to_fp16)[name = string("op_1134_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1123_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1123_cast_fp16)[name = string("x2_21_cast_fp16")]; tensor var_1155_cast_fp16 = mul(x = x1_21_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1155_cast_fp16")]; tensor var_1156_cast_fp16 = mul(x = x2_21_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1156_cast_fp16")]; tensor var_1157_cast_fp16 = sub(x = var_1155_cast_fp16, y = var_1156_cast_fp16)[name = string("op_1157_cast_fp16")]; tensor var_1158_cast_fp16 = mul(x = x2_21_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1158_cast_fp16")]; tensor var_1159_cast_fp16 = mul(x = x1_21_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1159_cast_fp16")]; tensor var_1160_cast_fp16 = add(x = var_1158_cast_fp16, y = var_1159_cast_fp16)[name = string("op_1160_cast_fp16")]; bool q_11_interleave_0 = const()[name = string("q_11_interleave_0"), val = bool(false)]; tensor q_11_cast_fp16 = concat(axis = var_1069, interleave = q_11_interleave_0, values = (var_1157_cast_fp16, var_1160_cast_fp16))[name = string("q_11_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1134_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1134_cast_fp16)[name = string("x2_23_cast_fp16")]; tensor var_1182_cast_fp16 = mul(x = x1_23_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1182_cast_fp16")]; tensor var_1183_cast_fp16 = mul(x = x2_23_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1183_cast_fp16")]; tensor var_1184_cast_fp16 = sub(x = var_1182_cast_fp16, y = var_1183_cast_fp16)[name = string("op_1184_cast_fp16")]; tensor var_1185_cast_fp16 = mul(x = x2_23_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1185_cast_fp16")]; tensor var_1186_cast_fp16 = mul(x = x1_23_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1186_cast_fp16")]; tensor var_1187_cast_fp16 = add(x = var_1185_cast_fp16, y = var_1186_cast_fp16)[name = string("op_1187_cast_fp16")]; bool var_1189_interleave_0 = const()[name = string("op_1189_interleave_0"), val = bool(false)]; tensor var_1189_cast_fp16 = concat(axis = var_1069, interleave = var_1189_interleave_0, values = (var_1184_cast_fp16, var_1187_cast_fp16))[name = string("op_1189_cast_fp16")]; tensor transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_94 = const()[name = string("concat_94"), val = tensor([128, 1024])]; tensor transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = var_1189_cast_fp16)[name = string("transpose_60")]; tensor reshape_31_cast_fp16 = reshape(shape = concat_94, x = transpose_21_cast_fp16)[name = string("reshape_31_cast_fp16")]; bool matmul_10_transpose_x_1 = const()[name = string("matmul_10_transpose_x_1"), val = bool(true)]; bool matmul_10_transpose_y_1 = const()[name = string("matmul_10_transpose_y_1"), val = bool(false)]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_1, transpose_y = matmul_10_transpose_y_1, x = var_68_to_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")]; tensor concat_97 = const()[name = string("concat_97"), val = tensor([1024, 1, 8, 128])]; tensor reshape_32_cast_fp16 = reshape(shape = concat_97, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")]; tensor scattered_k_11_perm_0 = const()[name = string("scattered_k_11_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_102 = const()[name = string("concat_102"), val = tensor([128, 1024])]; tensor transpose_61_cast_fp16 = transpose(perm = transpose_61_perm_0, x = var_1112_cast_fp16)[name = string("transpose_59")]; tensor reshape_34_cast_fp16 = reshape(shape = concat_102, x = transpose_61_cast_fp16)[name = string("reshape_34_cast_fp16")]; bool matmul_11_transpose_x_1 = const()[name = string("matmul_11_transpose_x_1"), val = bool(true)]; bool matmul_11_transpose_y_1 = const()[name = string("matmul_11_transpose_y_1"), val = bool(false)]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_1, transpose_y = matmul_11_transpose_y_1, x = var_68_to_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")]; tensor concat_105 = const()[name = string("concat_105"), val = tensor([1024, 1, 8, 128])]; tensor reshape_35_cast_fp16 = reshape(shape = concat_105, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")]; tensor scattered_v_11_perm_0 = const()[name = string("scattered_v_11_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_10 = read_state(input = k_cache_5)[name = string("read_state_10")]; tensor k_cache_33_cast_fp16 = mul(x = read_state_10, y = var_224_cast_fp16)[name = string("k_cache_33_cast_fp16")]; write_state(data = k_cache_33_cast_fp16, input = k_cache_5)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = k_cache_5)[name = string("coreml_update_state_76")]; tensor scattered_k_11_cast_fp16 = transpose(perm = scattered_k_11_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_58")]; tensor k_cache_35_cast_fp16 = add(x = coreml_update_state_76, y = scattered_k_11_cast_fp16)[name = string("k_cache_35_cast_fp16")]; write_state(data = k_cache_35_cast_fp16, input = k_cache_5)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = k_cache_5)[name = string("coreml_update_state_77")]; tensor read_state_11 = read_state(input = v_cache_5)[name = string("read_state_11")]; tensor v_cache_33_cast_fp16 = mul(x = read_state_11, y = var_224_cast_fp16)[name = string("v_cache_33_cast_fp16")]; write_state(data = v_cache_33_cast_fp16, input = v_cache_5)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = v_cache_5)[name = string("coreml_update_state_78")]; tensor scattered_v_11_cast_fp16 = transpose(perm = scattered_v_11_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_57")]; tensor v_cache_35_cast_fp16 = add(x = coreml_update_state_78, y = scattered_v_11_cast_fp16)[name = string("v_cache_35_cast_fp16")]; write_state(data = v_cache_35_cast_fp16, input = v_cache_5)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = v_cache_5)[name = string("coreml_update_state_79")]; tensor var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor([2])]; tensor var_1200_cast_fp16 = expand_dims(axes = var_1200_axes_0, x = coreml_update_state_77)[name = string("op_1200_cast_fp16")]; tensor k_exp_21_reps_0 = const()[name = string("k_exp_21_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_21_cast_fp16 = tile(reps = k_exp_21_reps_0, x = var_1200_cast_fp16)[name = string("k_exp_21_cast_fp16")]; tensor var_1203 = const()[name = string("op_1203"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_23_cast_fp16 = reshape(shape = var_1203, x = k_exp_21_cast_fp16)[name = string("k_exp_23_cast_fp16")]; tensor var_1205_axes_0 = const()[name = string("op_1205_axes_0"), val = tensor([2])]; tensor var_1205_cast_fp16 = expand_dims(axes = var_1205_axes_0, x = coreml_update_state_79)[name = string("op_1205_cast_fp16")]; tensor v_exp_21_reps_0 = const()[name = string("v_exp_21_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_21_cast_fp16 = tile(reps = v_exp_21_reps_0, x = var_1205_cast_fp16)[name = string("v_exp_21_cast_fp16")]; tensor var_1208 = const()[name = string("op_1208"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_23_cast_fp16 = reshape(shape = var_1208, x = v_exp_21_cast_fp16)[name = string("v_exp_23_cast_fp16")]; bool var_1211_transpose_x_1 = const()[name = string("op_1211_transpose_x_1"), val = bool(false)]; bool var_1211_transpose_y_1 = const()[name = string("op_1211_transpose_y_1"), val = bool(true)]; tensor var_1211_cast_fp16 = matmul(transpose_x = var_1211_transpose_x_1, transpose_y = var_1211_transpose_y_1, x = q_11_cast_fp16, y = k_exp_23_cast_fp16)[name = string("op_1211_cast_fp16")]; fp16 var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_21_cast_fp16 = mul(x = var_1211_cast_fp16, y = var_1212_to_fp16)[name = string("attn_21_cast_fp16")]; tensor input_51_cast_fp16 = add(x = attn_21_cast_fp16, y = attention_mask_to_fp16)[name = string("input_51_cast_fp16")]; tensor attn_23_cast_fp16 = softmax(axis = var_1069, x = input_51_cast_fp16)[name = string("attn_23_cast_fp16")]; bool out_11_transpose_x_0 = const()[name = string("out_11_transpose_x_0"), val = bool(false)]; bool out_11_transpose_y_0 = const()[name = string("out_11_transpose_y_0"), val = bool(false)]; tensor out_11_cast_fp16 = matmul(transpose_x = out_11_transpose_x_0, transpose_y = out_11_transpose_y_0, x = attn_23_cast_fp16, y = v_exp_23_cast_fp16)[name = string("out_11_cast_fp16")]; tensor var_1217_perm_0 = const()[name = string("op_1217_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1218 = const()[name = string("op_1218"), val = tensor([1, 128, -1])]; tensor var_1217_cast_fp16 = transpose(perm = var_1217_perm_0, x = out_11_cast_fp16)[name = string("transpose_56")]; tensor input_53_cast_fp16 = reshape(shape = var_1218, x = var_1217_cast_fp16)[name = string("input_53_cast_fp16")]; tensor layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84998848))))[name = string("layers_5_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor x_149_cast_fp16 = add(x = x_129_cast_fp16, y = linear_38_cast_fp16)[name = string("x_149_cast_fp16")]; fp16 var_1068_promoted_3_to_fp16 = const()[name = string("op_1068_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1225_cast_fp16 = pow(x = x_149_cast_fp16, y = var_1068_promoted_3_to_fp16)[name = string("op_1225_cast_fp16")]; tensor var_1227_axes_0 = const()[name = string("op_1227_axes_0"), val = tensor([-1])]; bool var_1227_keep_dims_0 = const()[name = string("op_1227_keep_dims_0"), val = bool(true)]; tensor var_1227_cast_fp16 = reduce_mean(axes = var_1227_axes_0, keep_dims = var_1227_keep_dims_0, x = var_1225_cast_fp16)[name = string("op_1227_cast_fp16")]; fp16 var_1228_to_fp16 = const()[name = string("op_1228_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1229_cast_fp16 = add(x = var_1227_cast_fp16, y = var_1228_to_fp16)[name = string("op_1229_cast_fp16")]; fp32 norm_47_epsilon_0 = const()[name = string("norm_47_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_47_cast_fp16 = rsqrt(epsilon = norm_47_epsilon_0, x = var_1229_cast_fp16)[name = string("norm_47_cast_fp16")]; tensor var_1231_cast_fp16 = mul(x = x_149_cast_fp16, y = norm_47_cast_fp16)[name = string("op_1231_cast_fp16")]; tensor layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84999424)))]; tensor var_1232_cast_fp16 = mul(x = var_1231_cast_fp16, y = layers_5_post_attention_layernorm_weight_to_fp16)[name = string("op_1232_cast_fp16")]; tensor layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85001536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147328))))[name = string("layers_5_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = var_1232_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_1242_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1242_cast_fp16")]; tensor layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91293696))))[name = string("layers_5_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_up_proj_weight_to_fp16_palettized, x = var_1232_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_59_cast_fp16 = mul(x = var_1242_cast_fp16, y = linear_40_cast_fp16)[name = string("input_59_cast_fp16")]; tensor layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91294272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440064))))[name = string("layers_5_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor x_155_cast_fp16 = add(x = x_149_cast_fp16, y = linear_41_cast_fp16)[name = string("x_155_cast_fp16")]; int32 var_1263 = const()[name = string("op_1263"), val = int32(-1)]; fp16 var_1262_promoted_to_fp16 = const()[name = string("op_1262_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1272_cast_fp16 = pow(x = x_155_cast_fp16, y = var_1262_promoted_to_fp16)[name = string("op_1272_cast_fp16")]; tensor var_1274_axes_0 = const()[name = string("op_1274_axes_0"), val = tensor([-1])]; bool var_1274_keep_dims_0 = const()[name = string("op_1274_keep_dims_0"), val = bool(true)]; tensor var_1274_cast_fp16 = reduce_mean(axes = var_1274_axes_0, keep_dims = var_1274_keep_dims_0, x = var_1272_cast_fp16)[name = string("op_1274_cast_fp16")]; fp16 var_1275_to_fp16 = const()[name = string("op_1275_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1276_cast_fp16 = add(x = var_1274_cast_fp16, y = var_1275_to_fp16)[name = string("op_1276_cast_fp16")]; fp32 norm_49_epsilon_0 = const()[name = string("norm_49_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_49_cast_fp16 = rsqrt(epsilon = norm_49_epsilon_0, x = var_1276_cast_fp16)[name = string("norm_49_cast_fp16")]; tensor var_1278_cast_fp16 = mul(x = x_155_cast_fp16, y = norm_49_cast_fp16)[name = string("op_1278_cast_fp16")]; tensor layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440640)))]; tensor var_1279_cast_fp16 = mul(x = var_1278_cast_fp16, y = layers_6_input_layernorm_weight_to_fp16)[name = string("op_1279_cast_fp16")]; tensor layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94442752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96539968))))[name = string("layers_6_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = var_1279_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_1295 = const()[name = string("op_1295"), val = tensor([1, 128, 16, 128])]; tensor var_1296_cast_fp16 = reshape(shape = var_1295, x = linear_42_cast_fp16)[name = string("op_1296_cast_fp16")]; tensor x_161_perm_0 = const()[name = string("x_161_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96540544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589184))))[name = string("layers_6_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = var_1279_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor var_1300 = const()[name = string("op_1300"), val = tensor([1, 128, 8, 128])]; tensor var_1301_cast_fp16 = reshape(shape = var_1300, x = linear_43_cast_fp16)[name = string("op_1301_cast_fp16")]; tensor x_165_perm_0 = const()[name = string("x_165_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638400))))[name = string("layers_6_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = var_1279_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor var_1305 = const()[name = string("op_1305"), val = tensor([1, 128, 8, 128])]; tensor var_1306_cast_fp16 = reshape(shape = var_1305, x = linear_44_cast_fp16)[name = string("op_1306_cast_fp16")]; tensor transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_1262_promoted_1_to_fp16 = const()[name = string("op_1262_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_161_cast_fp16 = transpose(perm = x_161_perm_0, x = var_1296_cast_fp16)[name = string("transpose_55")]; tensor var_1310_cast_fp16 = pow(x = x_161_cast_fp16, y = var_1262_promoted_1_to_fp16)[name = string("op_1310_cast_fp16")]; tensor var_1312_axes_0 = const()[name = string("op_1312_axes_0"), val = tensor([-1])]; bool var_1312_keep_dims_0 = const()[name = string("op_1312_keep_dims_0"), val = bool(true)]; tensor var_1312_cast_fp16 = reduce_mean(axes = var_1312_axes_0, keep_dims = var_1312_keep_dims_0, x = var_1310_cast_fp16)[name = string("op_1312_cast_fp16")]; fp16 var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1314_cast_fp16 = add(x = var_1312_cast_fp16, y = var_1313_to_fp16)[name = string("op_1314_cast_fp16")]; fp32 norm_51_epsilon_0 = const()[name = string("norm_51_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_51_cast_fp16 = rsqrt(epsilon = norm_51_epsilon_0, x = var_1314_cast_fp16)[name = string("norm_51_cast_fp16")]; tensor var_1316_cast_fp16 = mul(x = x_161_cast_fp16, y = norm_51_cast_fp16)[name = string("op_1316_cast_fp16")]; tensor layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638976)))]; tensor var_1317_cast_fp16 = mul(x = var_1316_cast_fp16, y = layers_6_self_attn_q_norm_weight_to_fp16)[name = string("op_1317_cast_fp16")]; fp16 var_1262_promoted_2_to_fp16 = const()[name = string("op_1262_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_165_cast_fp16 = transpose(perm = x_165_perm_0, x = var_1301_cast_fp16)[name = string("transpose_54")]; tensor var_1321_cast_fp16 = pow(x = x_165_cast_fp16, y = var_1262_promoted_2_to_fp16)[name = string("op_1321_cast_fp16")]; tensor var_1323_axes_0 = const()[name = string("op_1323_axes_0"), val = tensor([-1])]; bool var_1323_keep_dims_0 = const()[name = string("op_1323_keep_dims_0"), val = bool(true)]; tensor var_1323_cast_fp16 = reduce_mean(axes = var_1323_axes_0, keep_dims = var_1323_keep_dims_0, x = var_1321_cast_fp16)[name = string("op_1323_cast_fp16")]; fp16 var_1324_to_fp16 = const()[name = string("op_1324_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1325_cast_fp16 = add(x = var_1323_cast_fp16, y = var_1324_to_fp16)[name = string("op_1325_cast_fp16")]; fp32 norm_53_epsilon_0 = const()[name = string("norm_53_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_53_cast_fp16 = rsqrt(epsilon = norm_53_epsilon_0, x = var_1325_cast_fp16)[name = string("norm_53_cast_fp16")]; tensor var_1327_cast_fp16 = mul(x = x_165_cast_fp16, y = norm_53_cast_fp16)[name = string("op_1327_cast_fp16")]; tensor layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639296)))]; tensor var_1328_cast_fp16 = mul(x = var_1327_cast_fp16, y = layers_6_self_attn_k_norm_weight_to_fp16)[name = string("op_1328_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1317_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1317_cast_fp16)[name = string("x2_25_cast_fp16")]; tensor var_1349_cast_fp16 = mul(x = x1_25_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1349_cast_fp16")]; tensor var_1350_cast_fp16 = mul(x = x2_25_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1350_cast_fp16")]; tensor var_1351_cast_fp16 = sub(x = var_1349_cast_fp16, y = var_1350_cast_fp16)[name = string("op_1351_cast_fp16")]; tensor var_1352_cast_fp16 = mul(x = x2_25_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1352_cast_fp16")]; tensor var_1353_cast_fp16 = mul(x = x1_25_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1353_cast_fp16")]; tensor var_1354_cast_fp16 = add(x = var_1352_cast_fp16, y = var_1353_cast_fp16)[name = string("op_1354_cast_fp16")]; bool q_13_interleave_0 = const()[name = string("q_13_interleave_0"), val = bool(false)]; tensor q_13_cast_fp16 = concat(axis = var_1263, interleave = q_13_interleave_0, values = (var_1351_cast_fp16, var_1354_cast_fp16))[name = string("q_13_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1328_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1328_cast_fp16)[name = string("x2_27_cast_fp16")]; tensor var_1376_cast_fp16 = mul(x = x1_27_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1376_cast_fp16")]; tensor var_1377_cast_fp16 = mul(x = x2_27_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1377_cast_fp16")]; tensor var_1378_cast_fp16 = sub(x = var_1376_cast_fp16, y = var_1377_cast_fp16)[name = string("op_1378_cast_fp16")]; tensor var_1379_cast_fp16 = mul(x = x2_27_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1379_cast_fp16")]; tensor var_1380_cast_fp16 = mul(x = x1_27_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1380_cast_fp16")]; tensor var_1381_cast_fp16 = add(x = var_1379_cast_fp16, y = var_1380_cast_fp16)[name = string("op_1381_cast_fp16")]; bool var_1383_interleave_0 = const()[name = string("op_1383_interleave_0"), val = bool(false)]; tensor var_1383_cast_fp16 = concat(axis = var_1263, interleave = var_1383_interleave_0, values = (var_1378_cast_fp16, var_1381_cast_fp16))[name = string("op_1383_cast_fp16")]; tensor transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_112 = const()[name = string("concat_112"), val = tensor([128, 1024])]; tensor transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = var_1383_cast_fp16)[name = string("transpose_53")]; tensor reshape_37_cast_fp16 = reshape(shape = concat_112, x = transpose_25_cast_fp16)[name = string("reshape_37_cast_fp16")]; bool matmul_12_transpose_x_1 = const()[name = string("matmul_12_transpose_x_1"), val = bool(true)]; bool matmul_12_transpose_y_1 = const()[name = string("matmul_12_transpose_y_1"), val = bool(false)]; tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_1, transpose_y = matmul_12_transpose_y_1, x = var_68_to_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")]; tensor concat_115 = const()[name = string("concat_115"), val = tensor([1024, 1, 8, 128])]; tensor reshape_38_cast_fp16 = reshape(shape = concat_115, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")]; tensor scattered_k_13_perm_0 = const()[name = string("scattered_k_13_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_120 = const()[name = string("concat_120"), val = tensor([128, 1024])]; tensor transpose_62_cast_fp16 = transpose(perm = transpose_62_perm_0, x = var_1306_cast_fp16)[name = string("transpose_52")]; tensor reshape_40_cast_fp16 = reshape(shape = concat_120, x = transpose_62_cast_fp16)[name = string("reshape_40_cast_fp16")]; bool matmul_13_transpose_x_1 = const()[name = string("matmul_13_transpose_x_1"), val = bool(true)]; bool matmul_13_transpose_y_1 = const()[name = string("matmul_13_transpose_y_1"), val = bool(false)]; tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_1, transpose_y = matmul_13_transpose_y_1, x = var_68_to_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")]; tensor concat_123 = const()[name = string("concat_123"), val = tensor([1024, 1, 8, 128])]; tensor reshape_41_cast_fp16 = reshape(shape = concat_123, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")]; tensor scattered_v_13_perm_0 = const()[name = string("scattered_v_13_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_12 = read_state(input = k_cache_6)[name = string("read_state_12")]; tensor k_cache_39_cast_fp16 = mul(x = read_state_12, y = var_224_cast_fp16)[name = string("k_cache_39_cast_fp16")]; write_state(data = k_cache_39_cast_fp16, input = k_cache_6)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = k_cache_6)[name = string("coreml_update_state_80")]; tensor scattered_k_13_cast_fp16 = transpose(perm = scattered_k_13_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_51")]; tensor k_cache_41_cast_fp16 = add(x = coreml_update_state_80, y = scattered_k_13_cast_fp16)[name = string("k_cache_41_cast_fp16")]; write_state(data = k_cache_41_cast_fp16, input = k_cache_6)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = k_cache_6)[name = string("coreml_update_state_81")]; tensor read_state_13 = read_state(input = v_cache_6)[name = string("read_state_13")]; tensor v_cache_39_cast_fp16 = mul(x = read_state_13, y = var_224_cast_fp16)[name = string("v_cache_39_cast_fp16")]; write_state(data = v_cache_39_cast_fp16, input = v_cache_6)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = v_cache_6)[name = string("coreml_update_state_82")]; tensor scattered_v_13_cast_fp16 = transpose(perm = scattered_v_13_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_50")]; tensor v_cache_41_cast_fp16 = add(x = coreml_update_state_82, y = scattered_v_13_cast_fp16)[name = string("v_cache_41_cast_fp16")]; write_state(data = v_cache_41_cast_fp16, input = v_cache_6)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = v_cache_6)[name = string("coreml_update_state_83")]; tensor var_1394_axes_0 = const()[name = string("op_1394_axes_0"), val = tensor([2])]; tensor var_1394_cast_fp16 = expand_dims(axes = var_1394_axes_0, x = coreml_update_state_81)[name = string("op_1394_cast_fp16")]; tensor k_exp_25_reps_0 = const()[name = string("k_exp_25_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_25_cast_fp16 = tile(reps = k_exp_25_reps_0, x = var_1394_cast_fp16)[name = string("k_exp_25_cast_fp16")]; tensor var_1397 = const()[name = string("op_1397"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_27_cast_fp16 = reshape(shape = var_1397, x = k_exp_25_cast_fp16)[name = string("k_exp_27_cast_fp16")]; tensor var_1399_axes_0 = const()[name = string("op_1399_axes_0"), val = tensor([2])]; tensor var_1399_cast_fp16 = expand_dims(axes = var_1399_axes_0, x = coreml_update_state_83)[name = string("op_1399_cast_fp16")]; tensor v_exp_25_reps_0 = const()[name = string("v_exp_25_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_25_cast_fp16 = tile(reps = v_exp_25_reps_0, x = var_1399_cast_fp16)[name = string("v_exp_25_cast_fp16")]; tensor var_1402 = const()[name = string("op_1402"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_27_cast_fp16 = reshape(shape = var_1402, x = v_exp_25_cast_fp16)[name = string("v_exp_27_cast_fp16")]; bool var_1405_transpose_x_1 = const()[name = string("op_1405_transpose_x_1"), val = bool(false)]; bool var_1405_transpose_y_1 = const()[name = string("op_1405_transpose_y_1"), val = bool(true)]; tensor var_1405_cast_fp16 = matmul(transpose_x = var_1405_transpose_x_1, transpose_y = var_1405_transpose_y_1, x = q_13_cast_fp16, y = k_exp_27_cast_fp16)[name = string("op_1405_cast_fp16")]; fp16 var_1406_to_fp16 = const()[name = string("op_1406_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_25_cast_fp16 = mul(x = var_1405_cast_fp16, y = var_1406_to_fp16)[name = string("attn_25_cast_fp16")]; tensor input_61_cast_fp16 = add(x = attn_25_cast_fp16, y = attention_mask_to_fp16)[name = string("input_61_cast_fp16")]; tensor attn_27_cast_fp16 = softmax(axis = var_1263, x = input_61_cast_fp16)[name = string("attn_27_cast_fp16")]; bool out_13_transpose_x_0 = const()[name = string("out_13_transpose_x_0"), val = bool(false)]; bool out_13_transpose_y_0 = const()[name = string("out_13_transpose_y_0"), val = bool(false)]; tensor out_13_cast_fp16 = matmul(transpose_x = out_13_transpose_x_0, transpose_y = out_13_transpose_y_0, x = attn_27_cast_fp16, y = v_exp_27_cast_fp16)[name = string("out_13_cast_fp16")]; tensor var_1411_perm_0 = const()[name = string("op_1411_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1412 = const()[name = string("op_1412"), val = tensor([1, 128, -1])]; tensor var_1411_cast_fp16 = transpose(perm = var_1411_perm_0, x = out_13_cast_fp16)[name = string("transpose_49")]; tensor input_63_cast_fp16 = reshape(shape = var_1412, x = var_1411_cast_fp16)[name = string("input_63_cast_fp16")]; tensor layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100736832))))[name = string("layers_6_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor x_175_cast_fp16 = add(x = x_155_cast_fp16, y = linear_45_cast_fp16)[name = string("x_175_cast_fp16")]; fp16 var_1262_promoted_3_to_fp16 = const()[name = string("op_1262_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1419_cast_fp16 = pow(x = x_175_cast_fp16, y = var_1262_promoted_3_to_fp16)[name = string("op_1419_cast_fp16")]; tensor var_1421_axes_0 = const()[name = string("op_1421_axes_0"), val = tensor([-1])]; bool var_1421_keep_dims_0 = const()[name = string("op_1421_keep_dims_0"), val = bool(true)]; tensor var_1421_cast_fp16 = reduce_mean(axes = var_1421_axes_0, keep_dims = var_1421_keep_dims_0, x = var_1419_cast_fp16)[name = string("op_1421_cast_fp16")]; fp16 var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1423_cast_fp16 = add(x = var_1421_cast_fp16, y = var_1422_to_fp16)[name = string("op_1423_cast_fp16")]; fp32 norm_55_epsilon_0 = const()[name = string("norm_55_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_55_cast_fp16 = rsqrt(epsilon = norm_55_epsilon_0, x = var_1423_cast_fp16)[name = string("norm_55_cast_fp16")]; tensor var_1425_cast_fp16 = mul(x = x_175_cast_fp16, y = norm_55_cast_fp16)[name = string("op_1425_cast_fp16")]; tensor layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100737408)))]; tensor var_1426_cast_fp16 = mul(x = var_1425_cast_fp16, y = layers_6_post_attention_layernorm_weight_to_fp16)[name = string("op_1426_cast_fp16")]; tensor layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100739520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885312))))[name = string("layers_6_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = var_1426_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_1436_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1436_cast_fp16")]; tensor layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107031680))))[name = string("layers_6_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_up_proj_weight_to_fp16_palettized, x = var_1426_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_69_cast_fp16 = mul(x = var_1436_cast_fp16, y = linear_47_cast_fp16)[name = string("input_69_cast_fp16")]; tensor layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107032256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178048))))[name = string("layers_6_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_48_cast_fp16)[name = string("x_181_cast_fp16")]; int32 var_1457 = const()[name = string("op_1457"), val = int32(-1)]; fp16 var_1456_promoted_to_fp16 = const()[name = string("op_1456_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1466_cast_fp16 = pow(x = x_181_cast_fp16, y = var_1456_promoted_to_fp16)[name = string("op_1466_cast_fp16")]; tensor var_1468_axes_0 = const()[name = string("op_1468_axes_0"), val = tensor([-1])]; bool var_1468_keep_dims_0 = const()[name = string("op_1468_keep_dims_0"), val = bool(true)]; tensor var_1468_cast_fp16 = reduce_mean(axes = var_1468_axes_0, keep_dims = var_1468_keep_dims_0, x = var_1466_cast_fp16)[name = string("op_1468_cast_fp16")]; fp16 var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1470_cast_fp16 = add(x = var_1468_cast_fp16, y = var_1469_to_fp16)[name = string("op_1470_cast_fp16")]; fp32 norm_57_epsilon_0 = const()[name = string("norm_57_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_57_cast_fp16 = rsqrt(epsilon = norm_57_epsilon_0, x = var_1470_cast_fp16)[name = string("norm_57_cast_fp16")]; tensor var_1472_cast_fp16 = mul(x = x_181_cast_fp16, y = norm_57_cast_fp16)[name = string("op_1472_cast_fp16")]; tensor layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178624)))]; tensor var_1473_cast_fp16 = mul(x = var_1472_cast_fp16, y = layers_7_input_layernorm_weight_to_fp16)[name = string("op_1473_cast_fp16")]; tensor layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112277952))))[name = string("layers_7_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = var_1473_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor var_1489 = const()[name = string("op_1489"), val = tensor([1, 128, 16, 128])]; tensor var_1490_cast_fp16 = reshape(shape = var_1489, x = linear_49_cast_fp16)[name = string("op_1490_cast_fp16")]; tensor x_187_perm_0 = const()[name = string("x_187_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112278528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327168))))[name = string("layers_7_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = var_1473_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor var_1494 = const()[name = string("op_1494"), val = tensor([1, 128, 8, 128])]; tensor var_1495_cast_fp16 = reshape(shape = var_1494, x = linear_50_cast_fp16)[name = string("op_1495_cast_fp16")]; tensor x_191_perm_0 = const()[name = string("x_191_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376384))))[name = string("layers_7_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = var_1473_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor var_1499 = const()[name = string("op_1499"), val = tensor([1, 128, 8, 128])]; tensor var_1500_cast_fp16 = reshape(shape = var_1499, x = linear_51_cast_fp16)[name = string("op_1500_cast_fp16")]; tensor transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_1456_promoted_1_to_fp16 = const()[name = string("op_1456_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_187_cast_fp16 = transpose(perm = x_187_perm_0, x = var_1490_cast_fp16)[name = string("transpose_48")]; tensor var_1504_cast_fp16 = pow(x = x_187_cast_fp16, y = var_1456_promoted_1_to_fp16)[name = string("op_1504_cast_fp16")]; tensor var_1506_axes_0 = const()[name = string("op_1506_axes_0"), val = tensor([-1])]; bool var_1506_keep_dims_0 = const()[name = string("op_1506_keep_dims_0"), val = bool(true)]; tensor var_1506_cast_fp16 = reduce_mean(axes = var_1506_axes_0, keep_dims = var_1506_keep_dims_0, x = var_1504_cast_fp16)[name = string("op_1506_cast_fp16")]; fp16 var_1507_to_fp16 = const()[name = string("op_1507_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1508_cast_fp16 = add(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = string("op_1508_cast_fp16")]; fp32 norm_59_epsilon_0 = const()[name = string("norm_59_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_59_cast_fp16 = rsqrt(epsilon = norm_59_epsilon_0, x = var_1508_cast_fp16)[name = string("norm_59_cast_fp16")]; tensor var_1510_cast_fp16 = mul(x = x_187_cast_fp16, y = norm_59_cast_fp16)[name = string("op_1510_cast_fp16")]; tensor layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376960)))]; tensor var_1511_cast_fp16 = mul(x = var_1510_cast_fp16, y = layers_7_self_attn_q_norm_weight_to_fp16)[name = string("op_1511_cast_fp16")]; fp16 var_1456_promoted_2_to_fp16 = const()[name = string("op_1456_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_191_cast_fp16 = transpose(perm = x_191_perm_0, x = var_1495_cast_fp16)[name = string("transpose_47")]; tensor var_1515_cast_fp16 = pow(x = x_191_cast_fp16, y = var_1456_promoted_2_to_fp16)[name = string("op_1515_cast_fp16")]; tensor var_1517_axes_0 = const()[name = string("op_1517_axes_0"), val = tensor([-1])]; bool var_1517_keep_dims_0 = const()[name = string("op_1517_keep_dims_0"), val = bool(true)]; tensor var_1517_cast_fp16 = reduce_mean(axes = var_1517_axes_0, keep_dims = var_1517_keep_dims_0, x = var_1515_cast_fp16)[name = string("op_1517_cast_fp16")]; fp16 var_1518_to_fp16 = const()[name = string("op_1518_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1519_cast_fp16 = add(x = var_1517_cast_fp16, y = var_1518_to_fp16)[name = string("op_1519_cast_fp16")]; fp32 norm_61_epsilon_0 = const()[name = string("norm_61_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_61_cast_fp16 = rsqrt(epsilon = norm_61_epsilon_0, x = var_1519_cast_fp16)[name = string("norm_61_cast_fp16")]; tensor var_1521_cast_fp16 = mul(x = x_191_cast_fp16, y = norm_61_cast_fp16)[name = string("op_1521_cast_fp16")]; tensor layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377280)))]; tensor var_1522_cast_fp16 = mul(x = var_1521_cast_fp16, y = layers_7_self_attn_k_norm_weight_to_fp16)[name = string("op_1522_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1511_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1511_cast_fp16)[name = string("x2_29_cast_fp16")]; tensor var_1543_cast_fp16 = mul(x = x1_29_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1543_cast_fp16")]; tensor var_1544_cast_fp16 = mul(x = x2_29_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1544_cast_fp16")]; tensor var_1545_cast_fp16 = sub(x = var_1543_cast_fp16, y = var_1544_cast_fp16)[name = string("op_1545_cast_fp16")]; tensor var_1546_cast_fp16 = mul(x = x2_29_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1546_cast_fp16")]; tensor var_1547_cast_fp16 = mul(x = x1_29_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1547_cast_fp16")]; tensor var_1548_cast_fp16 = add(x = var_1546_cast_fp16, y = var_1547_cast_fp16)[name = string("op_1548_cast_fp16")]; bool q_15_interleave_0 = const()[name = string("q_15_interleave_0"), val = bool(false)]; tensor q_15_cast_fp16 = concat(axis = var_1457, interleave = q_15_interleave_0, values = (var_1545_cast_fp16, var_1548_cast_fp16))[name = string("q_15_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1522_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1522_cast_fp16)[name = string("x2_31_cast_fp16")]; tensor var_1570_cast_fp16 = mul(x = x1_31_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1570_cast_fp16")]; tensor var_1571_cast_fp16 = mul(x = x2_31_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1571_cast_fp16")]; tensor var_1572_cast_fp16 = sub(x = var_1570_cast_fp16, y = var_1571_cast_fp16)[name = string("op_1572_cast_fp16")]; tensor var_1573_cast_fp16 = mul(x = x2_31_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1573_cast_fp16")]; tensor var_1574_cast_fp16 = mul(x = x1_31_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1574_cast_fp16")]; tensor var_1575_cast_fp16 = add(x = var_1573_cast_fp16, y = var_1574_cast_fp16)[name = string("op_1575_cast_fp16")]; bool var_1577_interleave_0 = const()[name = string("op_1577_interleave_0"), val = bool(false)]; tensor var_1577_cast_fp16 = concat(axis = var_1457, interleave = var_1577_interleave_0, values = (var_1572_cast_fp16, var_1575_cast_fp16))[name = string("op_1577_cast_fp16")]; tensor transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_130 = const()[name = string("concat_130"), val = tensor([128, 1024])]; tensor transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = var_1577_cast_fp16)[name = string("transpose_46")]; tensor reshape_43_cast_fp16 = reshape(shape = concat_130, x = transpose_29_cast_fp16)[name = string("reshape_43_cast_fp16")]; bool matmul_14_transpose_x_1 = const()[name = string("matmul_14_transpose_x_1"), val = bool(true)]; bool matmul_14_transpose_y_1 = const()[name = string("matmul_14_transpose_y_1"), val = bool(false)]; tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_1, transpose_y = matmul_14_transpose_y_1, x = var_68_to_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")]; tensor concat_133 = const()[name = string("concat_133"), val = tensor([1024, 1, 8, 128])]; tensor reshape_44_cast_fp16 = reshape(shape = concat_133, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")]; tensor scattered_k_15_perm_0 = const()[name = string("scattered_k_15_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_138 = const()[name = string("concat_138"), val = tensor([128, 1024])]; tensor transpose_63_cast_fp16 = transpose(perm = transpose_63_perm_0, x = var_1500_cast_fp16)[name = string("transpose_45")]; tensor reshape_46_cast_fp16 = reshape(shape = concat_138, x = transpose_63_cast_fp16)[name = string("reshape_46_cast_fp16")]; bool matmul_15_transpose_x_1 = const()[name = string("matmul_15_transpose_x_1"), val = bool(true)]; bool matmul_15_transpose_y_1 = const()[name = string("matmul_15_transpose_y_1"), val = bool(false)]; tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_1, transpose_y = matmul_15_transpose_y_1, x = var_68_to_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")]; tensor concat_141 = const()[name = string("concat_141"), val = tensor([1024, 1, 8, 128])]; tensor reshape_47_cast_fp16 = reshape(shape = concat_141, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")]; tensor scattered_v_15_perm_0 = const()[name = string("scattered_v_15_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_14 = read_state(input = k_cache_7)[name = string("read_state_14")]; tensor k_cache_45_cast_fp16 = mul(x = read_state_14, y = var_224_cast_fp16)[name = string("k_cache_45_cast_fp16")]; write_state(data = k_cache_45_cast_fp16, input = k_cache_7)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = k_cache_7)[name = string("coreml_update_state_84")]; tensor scattered_k_15_cast_fp16 = transpose(perm = scattered_k_15_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_44")]; tensor k_cache_47_cast_fp16 = add(x = coreml_update_state_84, y = scattered_k_15_cast_fp16)[name = string("k_cache_47_cast_fp16")]; write_state(data = k_cache_47_cast_fp16, input = k_cache_7)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = k_cache_7)[name = string("coreml_update_state_85")]; tensor read_state_15 = read_state(input = v_cache_7)[name = string("read_state_15")]; tensor v_cache_45_cast_fp16 = mul(x = read_state_15, y = var_224_cast_fp16)[name = string("v_cache_45_cast_fp16")]; write_state(data = v_cache_45_cast_fp16, input = v_cache_7)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = v_cache_7)[name = string("coreml_update_state_86")]; tensor scattered_v_15_cast_fp16 = transpose(perm = scattered_v_15_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_43")]; tensor v_cache_47_cast_fp16 = add(x = coreml_update_state_86, y = scattered_v_15_cast_fp16)[name = string("v_cache_47_cast_fp16")]; write_state(data = v_cache_47_cast_fp16, input = v_cache_7)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = v_cache_7)[name = string("coreml_update_state_87")]; tensor var_1588_axes_0 = const()[name = string("op_1588_axes_0"), val = tensor([2])]; tensor var_1588_cast_fp16 = expand_dims(axes = var_1588_axes_0, x = coreml_update_state_85)[name = string("op_1588_cast_fp16")]; tensor k_exp_29_reps_0 = const()[name = string("k_exp_29_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_29_cast_fp16 = tile(reps = k_exp_29_reps_0, x = var_1588_cast_fp16)[name = string("k_exp_29_cast_fp16")]; tensor var_1591 = const()[name = string("op_1591"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_31_cast_fp16 = reshape(shape = var_1591, x = k_exp_29_cast_fp16)[name = string("k_exp_31_cast_fp16")]; tensor var_1593_axes_0 = const()[name = string("op_1593_axes_0"), val = tensor([2])]; tensor var_1593_cast_fp16 = expand_dims(axes = var_1593_axes_0, x = coreml_update_state_87)[name = string("op_1593_cast_fp16")]; tensor v_exp_29_reps_0 = const()[name = string("v_exp_29_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_29_cast_fp16 = tile(reps = v_exp_29_reps_0, x = var_1593_cast_fp16)[name = string("v_exp_29_cast_fp16")]; tensor var_1596 = const()[name = string("op_1596"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_31_cast_fp16 = reshape(shape = var_1596, x = v_exp_29_cast_fp16)[name = string("v_exp_31_cast_fp16")]; bool var_1599_transpose_x_1 = const()[name = string("op_1599_transpose_x_1"), val = bool(false)]; bool var_1599_transpose_y_1 = const()[name = string("op_1599_transpose_y_1"), val = bool(true)]; tensor var_1599_cast_fp16 = matmul(transpose_x = var_1599_transpose_x_1, transpose_y = var_1599_transpose_y_1, x = q_15_cast_fp16, y = k_exp_31_cast_fp16)[name = string("op_1599_cast_fp16")]; fp16 var_1600_to_fp16 = const()[name = string("op_1600_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_29_cast_fp16 = mul(x = var_1599_cast_fp16, y = var_1600_to_fp16)[name = string("attn_29_cast_fp16")]; tensor input_71_cast_fp16 = add(x = attn_29_cast_fp16, y = attention_mask_to_fp16)[name = string("input_71_cast_fp16")]; tensor attn_31_cast_fp16 = softmax(axis = var_1457, x = input_71_cast_fp16)[name = string("attn_31_cast_fp16")]; bool out_15_transpose_x_0 = const()[name = string("out_15_transpose_x_0"), val = bool(false)]; bool out_15_transpose_y_0 = const()[name = string("out_15_transpose_y_0"), val = bool(false)]; tensor out_15_cast_fp16 = matmul(transpose_x = out_15_transpose_x_0, transpose_y = out_15_transpose_y_0, x = attn_31_cast_fp16, y = v_exp_31_cast_fp16)[name = string("out_15_cast_fp16")]; tensor var_1605_perm_0 = const()[name = string("op_1605_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1606 = const()[name = string("op_1606"), val = tensor([1, 128, -1])]; tensor var_1605_cast_fp16 = transpose(perm = var_1605_perm_0, x = out_15_cast_fp16)[name = string("transpose_42")]; tensor input_73_cast_fp16 = reshape(shape = var_1606, x = var_1605_cast_fp16)[name = string("input_73_cast_fp16")]; tensor layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116474816))))[name = string("layers_7_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor x_201_cast_fp16 = add(x = x_181_cast_fp16, y = linear_52_cast_fp16)[name = string("x_201_cast_fp16")]; fp16 var_1456_promoted_3_to_fp16 = const()[name = string("op_1456_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1613_cast_fp16 = pow(x = x_201_cast_fp16, y = var_1456_promoted_3_to_fp16)[name = string("op_1613_cast_fp16")]; tensor var_1615_axes_0 = const()[name = string("op_1615_axes_0"), val = tensor([-1])]; bool var_1615_keep_dims_0 = const()[name = string("op_1615_keep_dims_0"), val = bool(true)]; tensor var_1615_cast_fp16 = reduce_mean(axes = var_1615_axes_0, keep_dims = var_1615_keep_dims_0, x = var_1613_cast_fp16)[name = string("op_1615_cast_fp16")]; fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1617_cast_fp16 = add(x = var_1615_cast_fp16, y = var_1616_to_fp16)[name = string("op_1617_cast_fp16")]; fp32 norm_63_epsilon_0 = const()[name = string("norm_63_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_63_cast_fp16 = rsqrt(epsilon = norm_63_epsilon_0, x = var_1617_cast_fp16)[name = string("norm_63_cast_fp16")]; tensor var_1619_cast_fp16 = mul(x = x_201_cast_fp16, y = norm_63_cast_fp16)[name = string("op_1619_cast_fp16")]; tensor layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116475392)))]; tensor var_1620_cast_fp16 = mul(x = var_1619_cast_fp16, y = layers_7_post_attention_layernorm_weight_to_fp16)[name = string("op_1620_cast_fp16")]; tensor layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116477504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623296))))[name = string("layers_7_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = var_1620_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_1630_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1630_cast_fp16")]; tensor layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122769664))))[name = string("layers_7_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_up_proj_weight_to_fp16_palettized, x = var_1620_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_79_cast_fp16 = mul(x = var_1630_cast_fp16, y = linear_54_cast_fp16)[name = string("input_79_cast_fp16")]; tensor layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122770240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916032))))[name = string("layers_7_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_55_cast_fp16)[name = string("x_207_cast_fp16")]; int32 var_1651 = const()[name = string("op_1651"), val = int32(-1)]; fp16 var_1650_promoted_to_fp16 = const()[name = string("op_1650_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1660_cast_fp16 = pow(x = x_207_cast_fp16, y = var_1650_promoted_to_fp16)[name = string("op_1660_cast_fp16")]; tensor var_1662_axes_0 = const()[name = string("op_1662_axes_0"), val = tensor([-1])]; bool var_1662_keep_dims_0 = const()[name = string("op_1662_keep_dims_0"), val = bool(true)]; tensor var_1662_cast_fp16 = reduce_mean(axes = var_1662_axes_0, keep_dims = var_1662_keep_dims_0, x = var_1660_cast_fp16)[name = string("op_1662_cast_fp16")]; fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1664_cast_fp16 = add(x = var_1662_cast_fp16, y = var_1663_to_fp16)[name = string("op_1664_cast_fp16")]; fp32 norm_65_epsilon_0 = const()[name = string("norm_65_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_65_cast_fp16 = rsqrt(epsilon = norm_65_epsilon_0, x = var_1664_cast_fp16)[name = string("norm_65_cast_fp16")]; tensor var_1666_cast_fp16 = mul(x = x_207_cast_fp16, y = norm_65_cast_fp16)[name = string("op_1666_cast_fp16")]; tensor layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916608)))]; tensor var_1667_cast_fp16 = mul(x = var_1666_cast_fp16, y = layers_8_input_layernorm_weight_to_fp16)[name = string("op_1667_cast_fp16")]; tensor layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125918720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128015936))))[name = string("layers_8_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = var_1667_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor var_1683 = const()[name = string("op_1683"), val = tensor([1, 128, 16, 128])]; tensor var_1684_cast_fp16 = reshape(shape = var_1683, x = linear_56_cast_fp16)[name = string("op_1684_cast_fp16")]; tensor x_213_perm_0 = const()[name = string("x_213_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128016512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065152))))[name = string("layers_8_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = var_1667_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor var_1688 = const()[name = string("op_1688"), val = tensor([1, 128, 8, 128])]; tensor var_1689_cast_fp16 = reshape(shape = var_1688, x = linear_57_cast_fp16)[name = string("op_1689_cast_fp16")]; tensor x_217_perm_0 = const()[name = string("x_217_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114368))))[name = string("layers_8_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = var_1667_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor var_1693 = const()[name = string("op_1693"), val = tensor([1, 128, 8, 128])]; tensor var_1694_cast_fp16 = reshape(shape = var_1693, x = linear_58_cast_fp16)[name = string("op_1694_cast_fp16")]; tensor transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_1650_promoted_1_to_fp16 = const()[name = string("op_1650_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_213_cast_fp16 = transpose(perm = x_213_perm_0, x = var_1684_cast_fp16)[name = string("transpose_41")]; tensor var_1698_cast_fp16 = pow(x = x_213_cast_fp16, y = var_1650_promoted_1_to_fp16)[name = string("op_1698_cast_fp16")]; tensor var_1700_axes_0 = const()[name = string("op_1700_axes_0"), val = tensor([-1])]; bool var_1700_keep_dims_0 = const()[name = string("op_1700_keep_dims_0"), val = bool(true)]; tensor var_1700_cast_fp16 = reduce_mean(axes = var_1700_axes_0, keep_dims = var_1700_keep_dims_0, x = var_1698_cast_fp16)[name = string("op_1700_cast_fp16")]; fp16 var_1701_to_fp16 = const()[name = string("op_1701_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1702_cast_fp16 = add(x = var_1700_cast_fp16, y = var_1701_to_fp16)[name = string("op_1702_cast_fp16")]; fp32 norm_67_epsilon_0 = const()[name = string("norm_67_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_67_cast_fp16 = rsqrt(epsilon = norm_67_epsilon_0, x = var_1702_cast_fp16)[name = string("norm_67_cast_fp16")]; tensor var_1704_cast_fp16 = mul(x = x_213_cast_fp16, y = norm_67_cast_fp16)[name = string("op_1704_cast_fp16")]; tensor layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114944)))]; tensor var_1705_cast_fp16 = mul(x = var_1704_cast_fp16, y = layers_8_self_attn_q_norm_weight_to_fp16)[name = string("op_1705_cast_fp16")]; fp16 var_1650_promoted_2_to_fp16 = const()[name = string("op_1650_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_217_cast_fp16 = transpose(perm = x_217_perm_0, x = var_1689_cast_fp16)[name = string("transpose_40")]; tensor var_1709_cast_fp16 = pow(x = x_217_cast_fp16, y = var_1650_promoted_2_to_fp16)[name = string("op_1709_cast_fp16")]; tensor var_1711_axes_0 = const()[name = string("op_1711_axes_0"), val = tensor([-1])]; bool var_1711_keep_dims_0 = const()[name = string("op_1711_keep_dims_0"), val = bool(true)]; tensor var_1711_cast_fp16 = reduce_mean(axes = var_1711_axes_0, keep_dims = var_1711_keep_dims_0, x = var_1709_cast_fp16)[name = string("op_1711_cast_fp16")]; fp16 var_1712_to_fp16 = const()[name = string("op_1712_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1713_cast_fp16 = add(x = var_1711_cast_fp16, y = var_1712_to_fp16)[name = string("op_1713_cast_fp16")]; fp32 norm_69_epsilon_0 = const()[name = string("norm_69_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_69_cast_fp16 = rsqrt(epsilon = norm_69_epsilon_0, x = var_1713_cast_fp16)[name = string("norm_69_cast_fp16")]; tensor var_1715_cast_fp16 = mul(x = x_217_cast_fp16, y = norm_69_cast_fp16)[name = string("op_1715_cast_fp16")]; tensor layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115264)))]; tensor var_1716_cast_fp16 = mul(x = var_1715_cast_fp16, y = layers_8_self_attn_k_norm_weight_to_fp16)[name = string("op_1716_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1705_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1705_cast_fp16)[name = string("x2_33_cast_fp16")]; tensor var_1737_cast_fp16 = mul(x = x1_33_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1737_cast_fp16")]; tensor var_1738_cast_fp16 = mul(x = x2_33_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1738_cast_fp16")]; tensor var_1739_cast_fp16 = sub(x = var_1737_cast_fp16, y = var_1738_cast_fp16)[name = string("op_1739_cast_fp16")]; tensor var_1740_cast_fp16 = mul(x = x2_33_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1740_cast_fp16")]; tensor var_1741_cast_fp16 = mul(x = x1_33_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1741_cast_fp16")]; tensor var_1742_cast_fp16 = add(x = var_1740_cast_fp16, y = var_1741_cast_fp16)[name = string("op_1742_cast_fp16")]; bool q_17_interleave_0 = const()[name = string("q_17_interleave_0"), val = bool(false)]; tensor q_17_cast_fp16 = concat(axis = var_1651, interleave = q_17_interleave_0, values = (var_1739_cast_fp16, var_1742_cast_fp16))[name = string("q_17_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_1716_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_1716_cast_fp16)[name = string("x2_35_cast_fp16")]; tensor var_1764_cast_fp16 = mul(x = x1_35_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1764_cast_fp16")]; tensor var_1765_cast_fp16 = mul(x = x2_35_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1765_cast_fp16")]; tensor var_1766_cast_fp16 = sub(x = var_1764_cast_fp16, y = var_1765_cast_fp16)[name = string("op_1766_cast_fp16")]; tensor var_1767_cast_fp16 = mul(x = x2_35_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor var_1768_cast_fp16 = mul(x = x1_35_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1768_cast_fp16")]; tensor var_1769_cast_fp16 = add(x = var_1767_cast_fp16, y = var_1768_cast_fp16)[name = string("op_1769_cast_fp16")]; bool var_1771_interleave_0 = const()[name = string("op_1771_interleave_0"), val = bool(false)]; tensor var_1771_cast_fp16 = concat(axis = var_1651, interleave = var_1771_interleave_0, values = (var_1766_cast_fp16, var_1769_cast_fp16))[name = string("op_1771_cast_fp16")]; tensor transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_148 = const()[name = string("concat_148"), val = tensor([128, 1024])]; tensor transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = var_1771_cast_fp16)[name = string("transpose_39")]; tensor reshape_49_cast_fp16 = reshape(shape = concat_148, x = transpose_33_cast_fp16)[name = string("reshape_49_cast_fp16")]; bool matmul_16_transpose_x_1 = const()[name = string("matmul_16_transpose_x_1"), val = bool(true)]; bool matmul_16_transpose_y_1 = const()[name = string("matmul_16_transpose_y_1"), val = bool(false)]; tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_1, transpose_y = matmul_16_transpose_y_1, x = var_68_to_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")]; tensor concat_151 = const()[name = string("concat_151"), val = tensor([1024, 1, 8, 128])]; tensor reshape_50_cast_fp16 = reshape(shape = concat_151, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")]; tensor scattered_k_17_perm_0 = const()[name = string("scattered_k_17_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_156 = const()[name = string("concat_156"), val = tensor([128, 1024])]; tensor transpose_64_cast_fp16 = transpose(perm = transpose_64_perm_0, x = var_1694_cast_fp16)[name = string("transpose_38")]; tensor reshape_52_cast_fp16 = reshape(shape = concat_156, x = transpose_64_cast_fp16)[name = string("reshape_52_cast_fp16")]; bool matmul_17_transpose_x_1 = const()[name = string("matmul_17_transpose_x_1"), val = bool(true)]; bool matmul_17_transpose_y_1 = const()[name = string("matmul_17_transpose_y_1"), val = bool(false)]; tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_1, transpose_y = matmul_17_transpose_y_1, x = var_68_to_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")]; tensor concat_159 = const()[name = string("concat_159"), val = tensor([1024, 1, 8, 128])]; tensor reshape_53_cast_fp16 = reshape(shape = concat_159, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")]; tensor scattered_v_17_perm_0 = const()[name = string("scattered_v_17_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_16 = read_state(input = k_cache_8)[name = string("read_state_16")]; tensor k_cache_51_cast_fp16 = mul(x = read_state_16, y = var_224_cast_fp16)[name = string("k_cache_51_cast_fp16")]; write_state(data = k_cache_51_cast_fp16, input = k_cache_8)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = k_cache_8)[name = string("coreml_update_state_88")]; tensor scattered_k_17_cast_fp16 = transpose(perm = scattered_k_17_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_37")]; tensor k_cache_53_cast_fp16 = add(x = coreml_update_state_88, y = scattered_k_17_cast_fp16)[name = string("k_cache_53_cast_fp16")]; write_state(data = k_cache_53_cast_fp16, input = k_cache_8)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = k_cache_8)[name = string("coreml_update_state_89")]; tensor read_state_17 = read_state(input = v_cache_8)[name = string("read_state_17")]; tensor v_cache_51_cast_fp16 = mul(x = read_state_17, y = var_224_cast_fp16)[name = string("v_cache_51_cast_fp16")]; write_state(data = v_cache_51_cast_fp16, input = v_cache_8)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = v_cache_8)[name = string("coreml_update_state_90")]; tensor scattered_v_17_cast_fp16 = transpose(perm = scattered_v_17_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_36")]; tensor v_cache_53_cast_fp16 = add(x = coreml_update_state_90, y = scattered_v_17_cast_fp16)[name = string("v_cache_53_cast_fp16")]; write_state(data = v_cache_53_cast_fp16, input = v_cache_8)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = v_cache_8)[name = string("coreml_update_state_91")]; tensor var_1782_axes_0 = const()[name = string("op_1782_axes_0"), val = tensor([2])]; tensor var_1782_cast_fp16 = expand_dims(axes = var_1782_axes_0, x = coreml_update_state_89)[name = string("op_1782_cast_fp16")]; tensor k_exp_33_reps_0 = const()[name = string("k_exp_33_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_33_cast_fp16 = tile(reps = k_exp_33_reps_0, x = var_1782_cast_fp16)[name = string("k_exp_33_cast_fp16")]; tensor var_1785 = const()[name = string("op_1785"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_35_cast_fp16 = reshape(shape = var_1785, x = k_exp_33_cast_fp16)[name = string("k_exp_35_cast_fp16")]; tensor var_1787_axes_0 = const()[name = string("op_1787_axes_0"), val = tensor([2])]; tensor var_1787_cast_fp16 = expand_dims(axes = var_1787_axes_0, x = coreml_update_state_91)[name = string("op_1787_cast_fp16")]; tensor v_exp_33_reps_0 = const()[name = string("v_exp_33_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_33_cast_fp16 = tile(reps = v_exp_33_reps_0, x = var_1787_cast_fp16)[name = string("v_exp_33_cast_fp16")]; tensor var_1790 = const()[name = string("op_1790"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_35_cast_fp16 = reshape(shape = var_1790, x = v_exp_33_cast_fp16)[name = string("v_exp_35_cast_fp16")]; bool var_1793_transpose_x_1 = const()[name = string("op_1793_transpose_x_1"), val = bool(false)]; bool var_1793_transpose_y_1 = const()[name = string("op_1793_transpose_y_1"), val = bool(true)]; tensor var_1793_cast_fp16 = matmul(transpose_x = var_1793_transpose_x_1, transpose_y = var_1793_transpose_y_1, x = q_17_cast_fp16, y = k_exp_35_cast_fp16)[name = string("op_1793_cast_fp16")]; fp16 var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_33_cast_fp16 = mul(x = var_1793_cast_fp16, y = var_1794_to_fp16)[name = string("attn_33_cast_fp16")]; tensor input_81_cast_fp16 = add(x = attn_33_cast_fp16, y = attention_mask_to_fp16)[name = string("input_81_cast_fp16")]; tensor attn_35_cast_fp16 = softmax(axis = var_1651, x = input_81_cast_fp16)[name = string("attn_35_cast_fp16")]; bool out_17_transpose_x_0 = const()[name = string("out_17_transpose_x_0"), val = bool(false)]; bool out_17_transpose_y_0 = const()[name = string("out_17_transpose_y_0"), val = bool(false)]; tensor out_17_cast_fp16 = matmul(transpose_x = out_17_transpose_x_0, transpose_y = out_17_transpose_y_0, x = attn_35_cast_fp16, y = v_exp_35_cast_fp16)[name = string("out_17_cast_fp16")]; tensor var_1799_perm_0 = const()[name = string("op_1799_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1800 = const()[name = string("op_1800"), val = tensor([1, 128, -1])]; tensor var_1799_cast_fp16 = transpose(perm = var_1799_perm_0, x = out_17_cast_fp16)[name = string("transpose_35")]; tensor input_83_cast_fp16 = reshape(shape = var_1800, x = var_1799_cast_fp16)[name = string("input_83_cast_fp16")]; tensor layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132212800))))[name = string("layers_8_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor x_227_cast_fp16 = add(x = x_207_cast_fp16, y = linear_59_cast_fp16)[name = string("x_227_cast_fp16")]; fp16 var_1650_promoted_3_to_fp16 = const()[name = string("op_1650_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_1807_cast_fp16 = pow(x = x_227_cast_fp16, y = var_1650_promoted_3_to_fp16)[name = string("op_1807_cast_fp16")]; tensor var_1809_axes_0 = const()[name = string("op_1809_axes_0"), val = tensor([-1])]; bool var_1809_keep_dims_0 = const()[name = string("op_1809_keep_dims_0"), val = bool(true)]; tensor var_1809_cast_fp16 = reduce_mean(axes = var_1809_axes_0, keep_dims = var_1809_keep_dims_0, x = var_1807_cast_fp16)[name = string("op_1809_cast_fp16")]; fp16 var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1811_cast_fp16 = add(x = var_1809_cast_fp16, y = var_1810_to_fp16)[name = string("op_1811_cast_fp16")]; fp32 norm_71_epsilon_0 = const()[name = string("norm_71_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_71_cast_fp16 = rsqrt(epsilon = norm_71_epsilon_0, x = var_1811_cast_fp16)[name = string("norm_71_cast_fp16")]; tensor var_1813_cast_fp16 = mul(x = x_227_cast_fp16, y = norm_71_cast_fp16)[name = string("op_1813_cast_fp16")]; tensor layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132213376)))]; tensor var_1814_cast_fp16 = mul(x = var_1813_cast_fp16, y = layers_8_post_attention_layernorm_weight_to_fp16)[name = string("op_1814_cast_fp16")]; tensor layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132215488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361280))))[name = string("layers_8_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = var_1814_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_1824_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1824_cast_fp16")]; tensor layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138507648))))[name = string("layers_8_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_up_proj_weight_to_fp16_palettized, x = var_1814_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_89_cast_fp16 = mul(x = var_1824_cast_fp16, y = linear_61_cast_fp16)[name = string("input_89_cast_fp16")]; tensor layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138508224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654016))))[name = string("layers_8_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor x_233_cast_fp16 = add(x = x_227_cast_fp16, y = linear_62_cast_fp16)[name = string("x_233_cast_fp16")]; int32 var_1845 = const()[name = string("op_1845"), val = int32(-1)]; fp16 var_1844_promoted_to_fp16 = const()[name = string("op_1844_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1854_cast_fp16 = pow(x = x_233_cast_fp16, y = var_1844_promoted_to_fp16)[name = string("op_1854_cast_fp16")]; tensor var_1856_axes_0 = const()[name = string("op_1856_axes_0"), val = tensor([-1])]; bool var_1856_keep_dims_0 = const()[name = string("op_1856_keep_dims_0"), val = bool(true)]; tensor var_1856_cast_fp16 = reduce_mean(axes = var_1856_axes_0, keep_dims = var_1856_keep_dims_0, x = var_1854_cast_fp16)[name = string("op_1856_cast_fp16")]; fp16 var_1857_to_fp16 = const()[name = string("op_1857_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1858_cast_fp16 = add(x = var_1856_cast_fp16, y = var_1857_to_fp16)[name = string("op_1858_cast_fp16")]; fp32 norm_73_epsilon_0 = const()[name = string("norm_73_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_73_cast_fp16 = rsqrt(epsilon = norm_73_epsilon_0, x = var_1858_cast_fp16)[name = string("norm_73_cast_fp16")]; tensor var_1860_cast_fp16 = mul(x = x_233_cast_fp16, y = norm_73_cast_fp16)[name = string("op_1860_cast_fp16")]; tensor layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654592)))]; tensor var_1861_cast_fp16 = mul(x = var_1860_cast_fp16, y = layers_9_input_layernorm_weight_to_fp16)[name = string("op_1861_cast_fp16")]; tensor layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141656704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143753920))))[name = string("layers_9_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor var_1877 = const()[name = string("op_1877"), val = tensor([1, 128, 16, 128])]; tensor var_1878_cast_fp16 = reshape(shape = var_1877, x = linear_63_cast_fp16)[name = string("op_1878_cast_fp16")]; tensor x_239_perm_0 = const()[name = string("x_239_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143754496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803136))))[name = string("layers_9_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor var_1882 = const()[name = string("op_1882"), val = tensor([1, 128, 8, 128])]; tensor var_1883_cast_fp16 = reshape(shape = var_1882, x = linear_64_cast_fp16)[name = string("op_1883_cast_fp16")]; tensor x_243_perm_0 = const()[name = string("x_243_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852352))))[name = string("layers_9_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor var_1887 = const()[name = string("op_1887"), val = tensor([1, 128, 8, 128])]; tensor var_1888_cast_fp16 = reshape(shape = var_1887, x = linear_65_cast_fp16)[name = string("op_1888_cast_fp16")]; tensor transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_1844_promoted_1_to_fp16 = const()[name = string("op_1844_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_239_cast_fp16 = transpose(perm = x_239_perm_0, x = var_1878_cast_fp16)[name = string("transpose_34")]; tensor var_1892_cast_fp16 = pow(x = x_239_cast_fp16, y = var_1844_promoted_1_to_fp16)[name = string("op_1892_cast_fp16")]; tensor var_1894_axes_0 = const()[name = string("op_1894_axes_0"), val = tensor([-1])]; bool var_1894_keep_dims_0 = const()[name = string("op_1894_keep_dims_0"), val = bool(true)]; tensor var_1894_cast_fp16 = reduce_mean(axes = var_1894_axes_0, keep_dims = var_1894_keep_dims_0, x = var_1892_cast_fp16)[name = string("op_1894_cast_fp16")]; fp16 var_1895_to_fp16 = const()[name = string("op_1895_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1896_cast_fp16 = add(x = var_1894_cast_fp16, y = var_1895_to_fp16)[name = string("op_1896_cast_fp16")]; fp32 norm_75_epsilon_0 = const()[name = string("norm_75_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_75_cast_fp16 = rsqrt(epsilon = norm_75_epsilon_0, x = var_1896_cast_fp16)[name = string("norm_75_cast_fp16")]; tensor var_1898_cast_fp16 = mul(x = x_239_cast_fp16, y = norm_75_cast_fp16)[name = string("op_1898_cast_fp16")]; tensor layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852928)))]; tensor var_1899_cast_fp16 = mul(x = var_1898_cast_fp16, y = layers_9_self_attn_q_norm_weight_to_fp16)[name = string("op_1899_cast_fp16")]; fp16 var_1844_promoted_2_to_fp16 = const()[name = string("op_1844_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_243_cast_fp16 = transpose(perm = x_243_perm_0, x = var_1883_cast_fp16)[name = string("transpose_33")]; tensor var_1903_cast_fp16 = pow(x = x_243_cast_fp16, y = var_1844_promoted_2_to_fp16)[name = string("op_1903_cast_fp16")]; tensor var_1905_axes_0 = const()[name = string("op_1905_axes_0"), val = tensor([-1])]; bool var_1905_keep_dims_0 = const()[name = string("op_1905_keep_dims_0"), val = bool(true)]; tensor var_1905_cast_fp16 = reduce_mean(axes = var_1905_axes_0, keep_dims = var_1905_keep_dims_0, x = var_1903_cast_fp16)[name = string("op_1905_cast_fp16")]; fp16 var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1907_cast_fp16 = add(x = var_1905_cast_fp16, y = var_1906_to_fp16)[name = string("op_1907_cast_fp16")]; fp32 norm_77_epsilon_0 = const()[name = string("norm_77_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_77_cast_fp16 = rsqrt(epsilon = norm_77_epsilon_0, x = var_1907_cast_fp16)[name = string("norm_77_cast_fp16")]; tensor var_1909_cast_fp16 = mul(x = x_243_cast_fp16, y = norm_77_cast_fp16)[name = string("op_1909_cast_fp16")]; tensor layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853248)))]; tensor var_1910_cast_fp16 = mul(x = var_1909_cast_fp16, y = layers_9_self_attn_k_norm_weight_to_fp16)[name = string("op_1910_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_1899_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_1899_cast_fp16)[name = string("x2_37_cast_fp16")]; tensor var_1931_cast_fp16 = mul(x = x1_37_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1931_cast_fp16")]; tensor var_1932_cast_fp16 = mul(x = x2_37_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1932_cast_fp16")]; tensor var_1933_cast_fp16 = sub(x = var_1931_cast_fp16, y = var_1932_cast_fp16)[name = string("op_1933_cast_fp16")]; tensor var_1934_cast_fp16 = mul(x = x2_37_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1934_cast_fp16")]; tensor var_1935_cast_fp16 = mul(x = x1_37_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1935_cast_fp16")]; tensor var_1936_cast_fp16 = add(x = var_1934_cast_fp16, y = var_1935_cast_fp16)[name = string("op_1936_cast_fp16")]; bool q_19_interleave_0 = const()[name = string("q_19_interleave_0"), val = bool(false)]; tensor q_19_cast_fp16 = concat(axis = var_1845, interleave = q_19_interleave_0, values = (var_1933_cast_fp16, var_1936_cast_fp16))[name = string("q_19_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_1910_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_1910_cast_fp16)[name = string("x2_39_cast_fp16")]; tensor var_1958_cast_fp16 = mul(x = x1_39_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1958_cast_fp16")]; tensor var_1959_cast_fp16 = mul(x = x2_39_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1959_cast_fp16")]; tensor var_1960_cast_fp16 = sub(x = var_1958_cast_fp16, y = var_1959_cast_fp16)[name = string("op_1960_cast_fp16")]; tensor var_1961_cast_fp16 = mul(x = x2_39_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1961_cast_fp16")]; tensor var_1962_cast_fp16 = mul(x = x1_39_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1962_cast_fp16")]; tensor var_1963_cast_fp16 = add(x = var_1961_cast_fp16, y = var_1962_cast_fp16)[name = string("op_1963_cast_fp16")]; bool var_1965_interleave_0 = const()[name = string("op_1965_interleave_0"), val = bool(false)]; tensor var_1965_cast_fp16 = concat(axis = var_1845, interleave = var_1965_interleave_0, values = (var_1960_cast_fp16, var_1963_cast_fp16))[name = string("op_1965_cast_fp16")]; tensor transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_166 = const()[name = string("concat_166"), val = tensor([128, 1024])]; tensor transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = var_1965_cast_fp16)[name = string("transpose_32")]; tensor reshape_55_cast_fp16 = reshape(shape = concat_166, x = transpose_37_cast_fp16)[name = string("reshape_55_cast_fp16")]; bool matmul_18_transpose_x_1 = const()[name = string("matmul_18_transpose_x_1"), val = bool(true)]; bool matmul_18_transpose_y_1 = const()[name = string("matmul_18_transpose_y_1"), val = bool(false)]; tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_1, transpose_y = matmul_18_transpose_y_1, x = var_68_to_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")]; tensor concat_169 = const()[name = string("concat_169"), val = tensor([1024, 1, 8, 128])]; tensor reshape_56_cast_fp16 = reshape(shape = concat_169, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")]; tensor scattered_k_19_perm_0 = const()[name = string("scattered_k_19_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_174 = const()[name = string("concat_174"), val = tensor([128, 1024])]; tensor transpose_65_cast_fp16 = transpose(perm = transpose_65_perm_0, x = var_1888_cast_fp16)[name = string("transpose_31")]; tensor reshape_58_cast_fp16 = reshape(shape = concat_174, x = transpose_65_cast_fp16)[name = string("reshape_58_cast_fp16")]; bool matmul_19_transpose_x_1 = const()[name = string("matmul_19_transpose_x_1"), val = bool(true)]; bool matmul_19_transpose_y_1 = const()[name = string("matmul_19_transpose_y_1"), val = bool(false)]; tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_1, transpose_y = matmul_19_transpose_y_1, x = var_68_to_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")]; tensor concat_177 = const()[name = string("concat_177"), val = tensor([1024, 1, 8, 128])]; tensor reshape_59_cast_fp16 = reshape(shape = concat_177, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")]; tensor scattered_v_19_perm_0 = const()[name = string("scattered_v_19_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_18 = read_state(input = k_cache_9)[name = string("read_state_18")]; tensor k_cache_57_cast_fp16 = mul(x = read_state_18, y = var_224_cast_fp16)[name = string("k_cache_57_cast_fp16")]; write_state(data = k_cache_57_cast_fp16, input = k_cache_9)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = k_cache_9)[name = string("coreml_update_state_92")]; tensor scattered_k_19_cast_fp16 = transpose(perm = scattered_k_19_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_30")]; tensor k_cache_59_cast_fp16 = add(x = coreml_update_state_92, y = scattered_k_19_cast_fp16)[name = string("k_cache_59_cast_fp16")]; write_state(data = k_cache_59_cast_fp16, input = k_cache_9)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = k_cache_9)[name = string("coreml_update_state_93")]; tensor read_state_19 = read_state(input = v_cache_9)[name = string("read_state_19")]; tensor v_cache_57_cast_fp16 = mul(x = read_state_19, y = var_224_cast_fp16)[name = string("v_cache_57_cast_fp16")]; write_state(data = v_cache_57_cast_fp16, input = v_cache_9)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_94 = read_state(input = v_cache_9)[name = string("coreml_update_state_94")]; tensor scattered_v_19_cast_fp16 = transpose(perm = scattered_v_19_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_29")]; tensor v_cache_59_cast_fp16 = add(x = coreml_update_state_94, y = scattered_v_19_cast_fp16)[name = string("v_cache_59_cast_fp16")]; write_state(data = v_cache_59_cast_fp16, input = v_cache_9)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_95 = read_state(input = v_cache_9)[name = string("coreml_update_state_95")]; tensor var_1976_axes_0 = const()[name = string("op_1976_axes_0"), val = tensor([2])]; tensor var_1976_cast_fp16 = expand_dims(axes = var_1976_axes_0, x = coreml_update_state_93)[name = string("op_1976_cast_fp16")]; tensor k_exp_37_reps_0 = const()[name = string("k_exp_37_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_37_cast_fp16 = tile(reps = k_exp_37_reps_0, x = var_1976_cast_fp16)[name = string("k_exp_37_cast_fp16")]; tensor var_1979 = const()[name = string("op_1979"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_39_cast_fp16 = reshape(shape = var_1979, x = k_exp_37_cast_fp16)[name = string("k_exp_39_cast_fp16")]; tensor var_1981_axes_0 = const()[name = string("op_1981_axes_0"), val = tensor([2])]; tensor var_1981_cast_fp16 = expand_dims(axes = var_1981_axes_0, x = coreml_update_state_95)[name = string("op_1981_cast_fp16")]; tensor v_exp_37_reps_0 = const()[name = string("v_exp_37_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_37_cast_fp16 = tile(reps = v_exp_37_reps_0, x = var_1981_cast_fp16)[name = string("v_exp_37_cast_fp16")]; tensor var_1984 = const()[name = string("op_1984"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_39_cast_fp16 = reshape(shape = var_1984, x = v_exp_37_cast_fp16)[name = string("v_exp_39_cast_fp16")]; bool var_1987_transpose_x_1 = const()[name = string("op_1987_transpose_x_1"), val = bool(false)]; bool var_1987_transpose_y_1 = const()[name = string("op_1987_transpose_y_1"), val = bool(true)]; tensor var_1987_cast_fp16 = matmul(transpose_x = var_1987_transpose_x_1, transpose_y = var_1987_transpose_y_1, x = q_19_cast_fp16, y = k_exp_39_cast_fp16)[name = string("op_1987_cast_fp16")]; fp16 var_1988_to_fp16 = const()[name = string("op_1988_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_37_cast_fp16 = mul(x = var_1987_cast_fp16, y = var_1988_to_fp16)[name = string("attn_37_cast_fp16")]; tensor input_91_cast_fp16 = add(x = attn_37_cast_fp16, y = attention_mask_to_fp16)[name = string("input_91_cast_fp16")]; tensor attn_39_cast_fp16 = softmax(axis = var_1845, x = input_91_cast_fp16)[name = string("attn_39_cast_fp16")]; bool out_19_transpose_x_0 = const()[name = string("out_19_transpose_x_0"), val = bool(false)]; bool out_19_transpose_y_0 = const()[name = string("out_19_transpose_y_0"), val = bool(false)]; tensor out_19_cast_fp16 = matmul(transpose_x = out_19_transpose_x_0, transpose_y = out_19_transpose_y_0, x = attn_39_cast_fp16, y = v_exp_39_cast_fp16)[name = string("out_19_cast_fp16")]; tensor var_1993_perm_0 = const()[name = string("op_1993_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1994 = const()[name = string("op_1994"), val = tensor([1, 128, -1])]; tensor var_1993_cast_fp16 = transpose(perm = var_1993_perm_0, x = out_19_cast_fp16)[name = string("transpose_28")]; tensor input_93_cast_fp16 = reshape(shape = var_1994, x = var_1993_cast_fp16)[name = string("input_93_cast_fp16")]; tensor layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147950784))))[name = string("layers_9_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor x_253_cast_fp16 = add(x = x_233_cast_fp16, y = linear_66_cast_fp16)[name = string("x_253_cast_fp16")]; fp16 var_1844_promoted_3_to_fp16 = const()[name = string("op_1844_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2001_cast_fp16 = pow(x = x_253_cast_fp16, y = var_1844_promoted_3_to_fp16)[name = string("op_2001_cast_fp16")]; tensor var_2003_axes_0 = const()[name = string("op_2003_axes_0"), val = tensor([-1])]; bool var_2003_keep_dims_0 = const()[name = string("op_2003_keep_dims_0"), val = bool(true)]; tensor var_2003_cast_fp16 = reduce_mean(axes = var_2003_axes_0, keep_dims = var_2003_keep_dims_0, x = var_2001_cast_fp16)[name = string("op_2003_cast_fp16")]; fp16 var_2004_to_fp16 = const()[name = string("op_2004_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2005_cast_fp16 = add(x = var_2003_cast_fp16, y = var_2004_to_fp16)[name = string("op_2005_cast_fp16")]; fp32 norm_79_epsilon_0 = const()[name = string("norm_79_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_79_cast_fp16 = rsqrt(epsilon = norm_79_epsilon_0, x = var_2005_cast_fp16)[name = string("norm_79_cast_fp16")]; tensor var_2007_cast_fp16 = mul(x = x_253_cast_fp16, y = norm_79_cast_fp16)[name = string("op_2007_cast_fp16")]; tensor layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147951360)))]; tensor var_2008_cast_fp16 = mul(x = var_2007_cast_fp16, y = layers_9_post_attention_layernorm_weight_to_fp16)[name = string("op_2008_cast_fp16")]; tensor layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147953472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099264))))[name = string("layers_9_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = var_2008_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_2018_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_2018_cast_fp16")]; tensor layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154245632))))[name = string("layers_9_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_up_proj_weight_to_fp16_palettized, x = var_2008_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_99_cast_fp16 = mul(x = var_2018_cast_fp16, y = linear_68_cast_fp16)[name = string("input_99_cast_fp16")]; tensor layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154246208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392000))))[name = string("layers_9_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_69_cast_fp16)[name = string("x_259_cast_fp16")]; int32 var_2039 = const()[name = string("op_2039"), val = int32(-1)]; fp16 var_2038_promoted_to_fp16 = const()[name = string("op_2038_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2048_cast_fp16 = pow(x = x_259_cast_fp16, y = var_2038_promoted_to_fp16)[name = string("op_2048_cast_fp16")]; tensor var_2050_axes_0 = const()[name = string("op_2050_axes_0"), val = tensor([-1])]; bool var_2050_keep_dims_0 = const()[name = string("op_2050_keep_dims_0"), val = bool(true)]; tensor var_2050_cast_fp16 = reduce_mean(axes = var_2050_axes_0, keep_dims = var_2050_keep_dims_0, x = var_2048_cast_fp16)[name = string("op_2050_cast_fp16")]; fp16 var_2051_to_fp16 = const()[name = string("op_2051_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2052_cast_fp16 = add(x = var_2050_cast_fp16, y = var_2051_to_fp16)[name = string("op_2052_cast_fp16")]; fp32 norm_81_epsilon_0 = const()[name = string("norm_81_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_81_cast_fp16 = rsqrt(epsilon = norm_81_epsilon_0, x = var_2052_cast_fp16)[name = string("norm_81_cast_fp16")]; tensor var_2054_cast_fp16 = mul(x = x_259_cast_fp16, y = norm_81_cast_fp16)[name = string("op_2054_cast_fp16")]; tensor layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392576)))]; tensor var_2055_cast_fp16 = mul(x = var_2054_cast_fp16, y = layers_10_input_layernorm_weight_to_fp16)[name = string("op_2055_cast_fp16")]; tensor layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157394688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159491904))))[name = string("layers_10_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = var_2055_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor var_2071 = const()[name = string("op_2071"), val = tensor([1, 128, 16, 128])]; tensor var_2072_cast_fp16 = reshape(shape = var_2071, x = linear_70_cast_fp16)[name = string("op_2072_cast_fp16")]; tensor x_265_perm_0 = const()[name = string("x_265_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159492480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541120))))[name = string("layers_10_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = var_2055_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor var_2076 = const()[name = string("op_2076"), val = tensor([1, 128, 8, 128])]; tensor var_2077_cast_fp16 = reshape(shape = var_2076, x = linear_71_cast_fp16)[name = string("op_2077_cast_fp16")]; tensor x_269_perm_0 = const()[name = string("x_269_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590336))))[name = string("layers_10_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = var_2055_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor var_2081 = const()[name = string("op_2081"), val = tensor([1, 128, 8, 128])]; tensor var_2082_cast_fp16 = reshape(shape = var_2081, x = linear_72_cast_fp16)[name = string("op_2082_cast_fp16")]; tensor transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_2038_promoted_1_to_fp16 = const()[name = string("op_2038_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_265_cast_fp16 = transpose(perm = x_265_perm_0, x = var_2072_cast_fp16)[name = string("transpose_27")]; tensor var_2086_cast_fp16 = pow(x = x_265_cast_fp16, y = var_2038_promoted_1_to_fp16)[name = string("op_2086_cast_fp16")]; tensor var_2088_axes_0 = const()[name = string("op_2088_axes_0"), val = tensor([-1])]; bool var_2088_keep_dims_0 = const()[name = string("op_2088_keep_dims_0"), val = bool(true)]; tensor var_2088_cast_fp16 = reduce_mean(axes = var_2088_axes_0, keep_dims = var_2088_keep_dims_0, x = var_2086_cast_fp16)[name = string("op_2088_cast_fp16")]; fp16 var_2089_to_fp16 = const()[name = string("op_2089_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2090_cast_fp16 = add(x = var_2088_cast_fp16, y = var_2089_to_fp16)[name = string("op_2090_cast_fp16")]; fp32 norm_83_epsilon_0 = const()[name = string("norm_83_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_83_cast_fp16 = rsqrt(epsilon = norm_83_epsilon_0, x = var_2090_cast_fp16)[name = string("norm_83_cast_fp16")]; tensor var_2092_cast_fp16 = mul(x = x_265_cast_fp16, y = norm_83_cast_fp16)[name = string("op_2092_cast_fp16")]; tensor layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590912)))]; tensor var_2093_cast_fp16 = mul(x = var_2092_cast_fp16, y = layers_10_self_attn_q_norm_weight_to_fp16)[name = string("op_2093_cast_fp16")]; fp16 var_2038_promoted_2_to_fp16 = const()[name = string("op_2038_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_269_cast_fp16 = transpose(perm = x_269_perm_0, x = var_2077_cast_fp16)[name = string("transpose_26")]; tensor var_2097_cast_fp16 = pow(x = x_269_cast_fp16, y = var_2038_promoted_2_to_fp16)[name = string("op_2097_cast_fp16")]; tensor var_2099_axes_0 = const()[name = string("op_2099_axes_0"), val = tensor([-1])]; bool var_2099_keep_dims_0 = const()[name = string("op_2099_keep_dims_0"), val = bool(true)]; tensor var_2099_cast_fp16 = reduce_mean(axes = var_2099_axes_0, keep_dims = var_2099_keep_dims_0, x = var_2097_cast_fp16)[name = string("op_2099_cast_fp16")]; fp16 var_2100_to_fp16 = const()[name = string("op_2100_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2101_cast_fp16 = add(x = var_2099_cast_fp16, y = var_2100_to_fp16)[name = string("op_2101_cast_fp16")]; fp32 norm_85_epsilon_0 = const()[name = string("norm_85_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_85_cast_fp16 = rsqrt(epsilon = norm_85_epsilon_0, x = var_2101_cast_fp16)[name = string("norm_85_cast_fp16")]; tensor var_2103_cast_fp16 = mul(x = x_269_cast_fp16, y = norm_85_cast_fp16)[name = string("op_2103_cast_fp16")]; tensor layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591232)))]; tensor var_2104_cast_fp16 = mul(x = var_2103_cast_fp16, y = layers_10_self_attn_k_norm_weight_to_fp16)[name = string("op_2104_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_2093_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_2093_cast_fp16)[name = string("x2_41_cast_fp16")]; tensor var_2125_cast_fp16 = mul(x = x1_41_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2125_cast_fp16")]; tensor var_2126_cast_fp16 = mul(x = x2_41_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2126_cast_fp16")]; tensor var_2127_cast_fp16 = sub(x = var_2125_cast_fp16, y = var_2126_cast_fp16)[name = string("op_2127_cast_fp16")]; tensor var_2128_cast_fp16 = mul(x = x2_41_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2128_cast_fp16")]; tensor var_2129_cast_fp16 = mul(x = x1_41_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2129_cast_fp16")]; tensor var_2130_cast_fp16 = add(x = var_2128_cast_fp16, y = var_2129_cast_fp16)[name = string("op_2130_cast_fp16")]; bool q_21_interleave_0 = const()[name = string("q_21_interleave_0"), val = bool(false)]; tensor q_21_cast_fp16 = concat(axis = var_2039, interleave = q_21_interleave_0, values = (var_2127_cast_fp16, var_2130_cast_fp16))[name = string("q_21_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_2104_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_2104_cast_fp16)[name = string("x2_43_cast_fp16")]; tensor var_2152_cast_fp16 = mul(x = x1_43_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2152_cast_fp16")]; tensor var_2153_cast_fp16 = mul(x = x2_43_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2153_cast_fp16")]; tensor var_2154_cast_fp16 = sub(x = var_2152_cast_fp16, y = var_2153_cast_fp16)[name = string("op_2154_cast_fp16")]; tensor var_2155_cast_fp16 = mul(x = x2_43_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2155_cast_fp16")]; tensor var_2156_cast_fp16 = mul(x = x1_43_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2156_cast_fp16")]; tensor var_2157_cast_fp16 = add(x = var_2155_cast_fp16, y = var_2156_cast_fp16)[name = string("op_2157_cast_fp16")]; bool var_2159_interleave_0 = const()[name = string("op_2159_interleave_0"), val = bool(false)]; tensor var_2159_cast_fp16 = concat(axis = var_2039, interleave = var_2159_interleave_0, values = (var_2154_cast_fp16, var_2157_cast_fp16))[name = string("op_2159_cast_fp16")]; tensor transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_184 = const()[name = string("concat_184"), val = tensor([128, 1024])]; tensor transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = var_2159_cast_fp16)[name = string("transpose_25")]; tensor reshape_61_cast_fp16 = reshape(shape = concat_184, x = transpose_41_cast_fp16)[name = string("reshape_61_cast_fp16")]; bool matmul_20_transpose_x_1 = const()[name = string("matmul_20_transpose_x_1"), val = bool(true)]; bool matmul_20_transpose_y_1 = const()[name = string("matmul_20_transpose_y_1"), val = bool(false)]; tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_1, transpose_y = matmul_20_transpose_y_1, x = var_68_to_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")]; tensor concat_187 = const()[name = string("concat_187"), val = tensor([1024, 1, 8, 128])]; tensor reshape_62_cast_fp16 = reshape(shape = concat_187, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")]; tensor scattered_k_21_perm_0 = const()[name = string("scattered_k_21_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_192 = const()[name = string("concat_192"), val = tensor([128, 1024])]; tensor transpose_66_cast_fp16 = transpose(perm = transpose_66_perm_0, x = var_2082_cast_fp16)[name = string("transpose_24")]; tensor reshape_64_cast_fp16 = reshape(shape = concat_192, x = transpose_66_cast_fp16)[name = string("reshape_64_cast_fp16")]; bool matmul_21_transpose_x_1 = const()[name = string("matmul_21_transpose_x_1"), val = bool(true)]; bool matmul_21_transpose_y_1 = const()[name = string("matmul_21_transpose_y_1"), val = bool(false)]; tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_1, transpose_y = matmul_21_transpose_y_1, x = var_68_to_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")]; tensor concat_195 = const()[name = string("concat_195"), val = tensor([1024, 1, 8, 128])]; tensor reshape_65_cast_fp16 = reshape(shape = concat_195, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")]; tensor scattered_v_21_perm_0 = const()[name = string("scattered_v_21_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_20 = read_state(input = k_cache_10)[name = string("read_state_20")]; tensor k_cache_63_cast_fp16 = mul(x = read_state_20, y = var_224_cast_fp16)[name = string("k_cache_63_cast_fp16")]; write_state(data = k_cache_63_cast_fp16, input = k_cache_10)[name = string("coreml_update_state_96_write_state")]; tensor coreml_update_state_96 = read_state(input = k_cache_10)[name = string("coreml_update_state_96")]; tensor scattered_k_21_cast_fp16 = transpose(perm = scattered_k_21_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_23")]; tensor k_cache_65_cast_fp16 = add(x = coreml_update_state_96, y = scattered_k_21_cast_fp16)[name = string("k_cache_65_cast_fp16")]; write_state(data = k_cache_65_cast_fp16, input = k_cache_10)[name = string("coreml_update_state_97_write_state")]; tensor coreml_update_state_97 = read_state(input = k_cache_10)[name = string("coreml_update_state_97")]; tensor read_state_21 = read_state(input = v_cache_10)[name = string("read_state_21")]; tensor v_cache_63_cast_fp16 = mul(x = read_state_21, y = var_224_cast_fp16)[name = string("v_cache_63_cast_fp16")]; write_state(data = v_cache_63_cast_fp16, input = v_cache_10)[name = string("coreml_update_state_98_write_state")]; tensor coreml_update_state_98 = read_state(input = v_cache_10)[name = string("coreml_update_state_98")]; tensor scattered_v_21_cast_fp16 = transpose(perm = scattered_v_21_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_22")]; tensor v_cache_65_cast_fp16 = add(x = coreml_update_state_98, y = scattered_v_21_cast_fp16)[name = string("v_cache_65_cast_fp16")]; write_state(data = v_cache_65_cast_fp16, input = v_cache_10)[name = string("coreml_update_state_99_write_state")]; tensor coreml_update_state_99 = read_state(input = v_cache_10)[name = string("coreml_update_state_99")]; tensor var_2170_axes_0 = const()[name = string("op_2170_axes_0"), val = tensor([2])]; tensor var_2170_cast_fp16 = expand_dims(axes = var_2170_axes_0, x = coreml_update_state_97)[name = string("op_2170_cast_fp16")]; tensor k_exp_41_reps_0 = const()[name = string("k_exp_41_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_41_cast_fp16 = tile(reps = k_exp_41_reps_0, x = var_2170_cast_fp16)[name = string("k_exp_41_cast_fp16")]; tensor var_2173 = const()[name = string("op_2173"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_43_cast_fp16 = reshape(shape = var_2173, x = k_exp_41_cast_fp16)[name = string("k_exp_43_cast_fp16")]; tensor var_2175_axes_0 = const()[name = string("op_2175_axes_0"), val = tensor([2])]; tensor var_2175_cast_fp16 = expand_dims(axes = var_2175_axes_0, x = coreml_update_state_99)[name = string("op_2175_cast_fp16")]; tensor v_exp_41_reps_0 = const()[name = string("v_exp_41_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_41_cast_fp16 = tile(reps = v_exp_41_reps_0, x = var_2175_cast_fp16)[name = string("v_exp_41_cast_fp16")]; tensor var_2178 = const()[name = string("op_2178"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_43_cast_fp16 = reshape(shape = var_2178, x = v_exp_41_cast_fp16)[name = string("v_exp_43_cast_fp16")]; bool var_2181_transpose_x_1 = const()[name = string("op_2181_transpose_x_1"), val = bool(false)]; bool var_2181_transpose_y_1 = const()[name = string("op_2181_transpose_y_1"), val = bool(true)]; tensor var_2181_cast_fp16 = matmul(transpose_x = var_2181_transpose_x_1, transpose_y = var_2181_transpose_y_1, x = q_21_cast_fp16, y = k_exp_43_cast_fp16)[name = string("op_2181_cast_fp16")]; fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_41_cast_fp16 = mul(x = var_2181_cast_fp16, y = var_2182_to_fp16)[name = string("attn_41_cast_fp16")]; tensor input_101_cast_fp16 = add(x = attn_41_cast_fp16, y = attention_mask_to_fp16)[name = string("input_101_cast_fp16")]; tensor attn_43_cast_fp16 = softmax(axis = var_2039, x = input_101_cast_fp16)[name = string("attn_43_cast_fp16")]; bool out_21_transpose_x_0 = const()[name = string("out_21_transpose_x_0"), val = bool(false)]; bool out_21_transpose_y_0 = const()[name = string("out_21_transpose_y_0"), val = bool(false)]; tensor out_21_cast_fp16 = matmul(transpose_x = out_21_transpose_x_0, transpose_y = out_21_transpose_y_0, x = attn_43_cast_fp16, y = v_exp_43_cast_fp16)[name = string("out_21_cast_fp16")]; tensor var_2187_perm_0 = const()[name = string("op_2187_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2188 = const()[name = string("op_2188"), val = tensor([1, 128, -1])]; tensor var_2187_cast_fp16 = transpose(perm = var_2187_perm_0, x = out_21_cast_fp16)[name = string("transpose_21")]; tensor input_103_cast_fp16 = reshape(shape = var_2188, x = var_2187_cast_fp16)[name = string("input_103_cast_fp16")]; tensor layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163688768))))[name = string("layers_10_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor x_279_cast_fp16 = add(x = x_259_cast_fp16, y = linear_73_cast_fp16)[name = string("x_279_cast_fp16")]; fp16 var_2038_promoted_3_to_fp16 = const()[name = string("op_2038_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2195_cast_fp16 = pow(x = x_279_cast_fp16, y = var_2038_promoted_3_to_fp16)[name = string("op_2195_cast_fp16")]; tensor var_2197_axes_0 = const()[name = string("op_2197_axes_0"), val = tensor([-1])]; bool var_2197_keep_dims_0 = const()[name = string("op_2197_keep_dims_0"), val = bool(true)]; tensor var_2197_cast_fp16 = reduce_mean(axes = var_2197_axes_0, keep_dims = var_2197_keep_dims_0, x = var_2195_cast_fp16)[name = string("op_2197_cast_fp16")]; fp16 var_2198_to_fp16 = const()[name = string("op_2198_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2199_cast_fp16 = add(x = var_2197_cast_fp16, y = var_2198_to_fp16)[name = string("op_2199_cast_fp16")]; fp32 norm_87_epsilon_0 = const()[name = string("norm_87_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_87_cast_fp16 = rsqrt(epsilon = norm_87_epsilon_0, x = var_2199_cast_fp16)[name = string("norm_87_cast_fp16")]; tensor var_2201_cast_fp16 = mul(x = x_279_cast_fp16, y = norm_87_cast_fp16)[name = string("op_2201_cast_fp16")]; tensor layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689344)))]; tensor var_2202_cast_fp16 = mul(x = var_2201_cast_fp16, y = layers_10_post_attention_layernorm_weight_to_fp16)[name = string("op_2202_cast_fp16")]; tensor layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163691456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837248))))[name = string("layers_10_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = var_2202_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2212_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2212_cast_fp16")]; tensor layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169983616))))[name = string("layers_10_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_up_proj_weight_to_fp16_palettized, x = var_2202_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_109_cast_fp16 = mul(x = var_2212_cast_fp16, y = linear_75_cast_fp16)[name = string("input_109_cast_fp16")]; tensor layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169984192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173129984))))[name = string("layers_10_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_76_cast_fp16)[name = string("x_285_cast_fp16")]; int32 var_2233 = const()[name = string("op_2233"), val = int32(-1)]; fp16 var_2232_promoted_to_fp16 = const()[name = string("op_2232_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2242_cast_fp16 = pow(x = x_285_cast_fp16, y = var_2232_promoted_to_fp16)[name = string("op_2242_cast_fp16")]; tensor var_2244_axes_0 = const()[name = string("op_2244_axes_0"), val = tensor([-1])]; bool var_2244_keep_dims_0 = const()[name = string("op_2244_keep_dims_0"), val = bool(true)]; tensor var_2244_cast_fp16 = reduce_mean(axes = var_2244_axes_0, keep_dims = var_2244_keep_dims_0, x = var_2242_cast_fp16)[name = string("op_2244_cast_fp16")]; fp16 var_2245_to_fp16 = const()[name = string("op_2245_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2246_cast_fp16 = add(x = var_2244_cast_fp16, y = var_2245_to_fp16)[name = string("op_2246_cast_fp16")]; fp32 norm_89_epsilon_0 = const()[name = string("norm_89_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_89_cast_fp16 = rsqrt(epsilon = norm_89_epsilon_0, x = var_2246_cast_fp16)[name = string("norm_89_cast_fp16")]; tensor var_2248_cast_fp16 = mul(x = x_285_cast_fp16, y = norm_89_cast_fp16)[name = string("op_2248_cast_fp16")]; tensor layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173130560)))]; tensor var_2249_cast_fp16 = mul(x = var_2248_cast_fp16, y = layers_11_input_layernorm_weight_to_fp16)[name = string("op_2249_cast_fp16")]; tensor layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173132672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175229888))))[name = string("layers_11_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = var_2249_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor var_2265 = const()[name = string("op_2265"), val = tensor([1, 128, 16, 128])]; tensor var_2266_cast_fp16 = reshape(shape = var_2265, x = linear_77_cast_fp16)[name = string("op_2266_cast_fp16")]; tensor x_291_perm_0 = const()[name = string("x_291_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175230464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279104))))[name = string("layers_11_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = var_2249_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor var_2270 = const()[name = string("op_2270"), val = tensor([1, 128, 8, 128])]; tensor var_2271_cast_fp16 = reshape(shape = var_2270, x = linear_78_cast_fp16)[name = string("op_2271_cast_fp16")]; tensor x_295_perm_0 = const()[name = string("x_295_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328320))))[name = string("layers_11_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = var_2249_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor var_2275 = const()[name = string("op_2275"), val = tensor([1, 128, 8, 128])]; tensor var_2276_cast_fp16 = reshape(shape = var_2275, x = linear_79_cast_fp16)[name = string("op_2276_cast_fp16")]; tensor transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_2232_promoted_1_to_fp16 = const()[name = string("op_2232_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_291_cast_fp16 = transpose(perm = x_291_perm_0, x = var_2266_cast_fp16)[name = string("transpose_20")]; tensor var_2280_cast_fp16 = pow(x = x_291_cast_fp16, y = var_2232_promoted_1_to_fp16)[name = string("op_2280_cast_fp16")]; tensor var_2282_axes_0 = const()[name = string("op_2282_axes_0"), val = tensor([-1])]; bool var_2282_keep_dims_0 = const()[name = string("op_2282_keep_dims_0"), val = bool(true)]; tensor var_2282_cast_fp16 = reduce_mean(axes = var_2282_axes_0, keep_dims = var_2282_keep_dims_0, x = var_2280_cast_fp16)[name = string("op_2282_cast_fp16")]; fp16 var_2283_to_fp16 = const()[name = string("op_2283_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2284_cast_fp16 = add(x = var_2282_cast_fp16, y = var_2283_to_fp16)[name = string("op_2284_cast_fp16")]; fp32 norm_91_epsilon_0 = const()[name = string("norm_91_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_91_cast_fp16 = rsqrt(epsilon = norm_91_epsilon_0, x = var_2284_cast_fp16)[name = string("norm_91_cast_fp16")]; tensor var_2286_cast_fp16 = mul(x = x_291_cast_fp16, y = norm_91_cast_fp16)[name = string("op_2286_cast_fp16")]; tensor layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328896)))]; tensor var_2287_cast_fp16 = mul(x = var_2286_cast_fp16, y = layers_11_self_attn_q_norm_weight_to_fp16)[name = string("op_2287_cast_fp16")]; fp16 var_2232_promoted_2_to_fp16 = const()[name = string("op_2232_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_295_cast_fp16 = transpose(perm = x_295_perm_0, x = var_2271_cast_fp16)[name = string("transpose_19")]; tensor var_2291_cast_fp16 = pow(x = x_295_cast_fp16, y = var_2232_promoted_2_to_fp16)[name = string("op_2291_cast_fp16")]; tensor var_2293_axes_0 = const()[name = string("op_2293_axes_0"), val = tensor([-1])]; bool var_2293_keep_dims_0 = const()[name = string("op_2293_keep_dims_0"), val = bool(true)]; tensor var_2293_cast_fp16 = reduce_mean(axes = var_2293_axes_0, keep_dims = var_2293_keep_dims_0, x = var_2291_cast_fp16)[name = string("op_2293_cast_fp16")]; fp16 var_2294_to_fp16 = const()[name = string("op_2294_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2295_cast_fp16 = add(x = var_2293_cast_fp16, y = var_2294_to_fp16)[name = string("op_2295_cast_fp16")]; fp32 norm_93_epsilon_0 = const()[name = string("norm_93_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_93_cast_fp16 = rsqrt(epsilon = norm_93_epsilon_0, x = var_2295_cast_fp16)[name = string("norm_93_cast_fp16")]; tensor var_2297_cast_fp16 = mul(x = x_295_cast_fp16, y = norm_93_cast_fp16)[name = string("op_2297_cast_fp16")]; tensor layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329216)))]; tensor var_2298_cast_fp16 = mul(x = var_2297_cast_fp16, y = layers_11_self_attn_k_norm_weight_to_fp16)[name = string("op_2298_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_2287_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_2287_cast_fp16)[name = string("x2_45_cast_fp16")]; tensor var_2319_cast_fp16 = mul(x = x1_45_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2319_cast_fp16")]; tensor var_2320_cast_fp16 = mul(x = x2_45_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2320_cast_fp16")]; tensor var_2321_cast_fp16 = sub(x = var_2319_cast_fp16, y = var_2320_cast_fp16)[name = string("op_2321_cast_fp16")]; tensor var_2322_cast_fp16 = mul(x = x2_45_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2322_cast_fp16")]; tensor var_2323_cast_fp16 = mul(x = x1_45_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2323_cast_fp16")]; tensor var_2324_cast_fp16 = add(x = var_2322_cast_fp16, y = var_2323_cast_fp16)[name = string("op_2324_cast_fp16")]; bool q_23_interleave_0 = const()[name = string("q_23_interleave_0"), val = bool(false)]; tensor q_23_cast_fp16 = concat(axis = var_2233, interleave = q_23_interleave_0, values = (var_2321_cast_fp16, var_2324_cast_fp16))[name = string("q_23_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_2298_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_2298_cast_fp16)[name = string("x2_47_cast_fp16")]; tensor var_2346_cast_fp16 = mul(x = x1_47_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2346_cast_fp16")]; tensor var_2347_cast_fp16 = mul(x = x2_47_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2347_cast_fp16")]; tensor var_2348_cast_fp16 = sub(x = var_2346_cast_fp16, y = var_2347_cast_fp16)[name = string("op_2348_cast_fp16")]; tensor var_2349_cast_fp16 = mul(x = x2_47_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2349_cast_fp16")]; tensor var_2350_cast_fp16 = mul(x = x1_47_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2350_cast_fp16")]; tensor var_2351_cast_fp16 = add(x = var_2349_cast_fp16, y = var_2350_cast_fp16)[name = string("op_2351_cast_fp16")]; bool var_2353_interleave_0 = const()[name = string("op_2353_interleave_0"), val = bool(false)]; tensor var_2353_cast_fp16 = concat(axis = var_2233, interleave = var_2353_interleave_0, values = (var_2348_cast_fp16, var_2351_cast_fp16))[name = string("op_2353_cast_fp16")]; tensor transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_202 = const()[name = string("concat_202"), val = tensor([128, 1024])]; tensor transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = var_2353_cast_fp16)[name = string("transpose_18")]; tensor reshape_67_cast_fp16 = reshape(shape = concat_202, x = transpose_45_cast_fp16)[name = string("reshape_67_cast_fp16")]; bool matmul_22_transpose_x_1 = const()[name = string("matmul_22_transpose_x_1"), val = bool(true)]; bool matmul_22_transpose_y_1 = const()[name = string("matmul_22_transpose_y_1"), val = bool(false)]; tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_1, transpose_y = matmul_22_transpose_y_1, x = var_68_to_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")]; tensor concat_205 = const()[name = string("concat_205"), val = tensor([1024, 1, 8, 128])]; tensor reshape_68_cast_fp16 = reshape(shape = concat_205, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")]; tensor scattered_k_23_perm_0 = const()[name = string("scattered_k_23_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_210 = const()[name = string("concat_210"), val = tensor([128, 1024])]; tensor transpose_67_cast_fp16 = transpose(perm = transpose_67_perm_0, x = var_2276_cast_fp16)[name = string("transpose_17")]; tensor reshape_70_cast_fp16 = reshape(shape = concat_210, x = transpose_67_cast_fp16)[name = string("reshape_70_cast_fp16")]; bool matmul_23_transpose_x_1 = const()[name = string("matmul_23_transpose_x_1"), val = bool(true)]; bool matmul_23_transpose_y_1 = const()[name = string("matmul_23_transpose_y_1"), val = bool(false)]; tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_1, transpose_y = matmul_23_transpose_y_1, x = var_68_to_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")]; tensor concat_213 = const()[name = string("concat_213"), val = tensor([1024, 1, 8, 128])]; tensor reshape_71_cast_fp16 = reshape(shape = concat_213, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")]; tensor scattered_v_23_perm_0 = const()[name = string("scattered_v_23_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_22 = read_state(input = k_cache_11)[name = string("read_state_22")]; tensor k_cache_69_cast_fp16 = mul(x = read_state_22, y = var_224_cast_fp16)[name = string("k_cache_69_cast_fp16")]; write_state(data = k_cache_69_cast_fp16, input = k_cache_11)[name = string("coreml_update_state_100_write_state")]; tensor coreml_update_state_100 = read_state(input = k_cache_11)[name = string("coreml_update_state_100")]; tensor scattered_k_23_cast_fp16 = transpose(perm = scattered_k_23_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_16")]; tensor k_cache_71_cast_fp16 = add(x = coreml_update_state_100, y = scattered_k_23_cast_fp16)[name = string("k_cache_71_cast_fp16")]; write_state(data = k_cache_71_cast_fp16, input = k_cache_11)[name = string("coreml_update_state_101_write_state")]; tensor coreml_update_state_101 = read_state(input = k_cache_11)[name = string("coreml_update_state_101")]; tensor read_state_23 = read_state(input = v_cache_11)[name = string("read_state_23")]; tensor v_cache_69_cast_fp16 = mul(x = read_state_23, y = var_224_cast_fp16)[name = string("v_cache_69_cast_fp16")]; write_state(data = v_cache_69_cast_fp16, input = v_cache_11)[name = string("coreml_update_state_102_write_state")]; tensor coreml_update_state_102 = read_state(input = v_cache_11)[name = string("coreml_update_state_102")]; tensor scattered_v_23_cast_fp16 = transpose(perm = scattered_v_23_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_15")]; tensor v_cache_71_cast_fp16 = add(x = coreml_update_state_102, y = scattered_v_23_cast_fp16)[name = string("v_cache_71_cast_fp16")]; write_state(data = v_cache_71_cast_fp16, input = v_cache_11)[name = string("coreml_update_state_103_write_state")]; tensor coreml_update_state_103 = read_state(input = v_cache_11)[name = string("coreml_update_state_103")]; tensor var_2364_axes_0 = const()[name = string("op_2364_axes_0"), val = tensor([2])]; tensor var_2364_cast_fp16 = expand_dims(axes = var_2364_axes_0, x = coreml_update_state_101)[name = string("op_2364_cast_fp16")]; tensor k_exp_45_reps_0 = const()[name = string("k_exp_45_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_45_cast_fp16 = tile(reps = k_exp_45_reps_0, x = var_2364_cast_fp16)[name = string("k_exp_45_cast_fp16")]; tensor var_2367 = const()[name = string("op_2367"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_47_cast_fp16 = reshape(shape = var_2367, x = k_exp_45_cast_fp16)[name = string("k_exp_47_cast_fp16")]; tensor var_2369_axes_0 = const()[name = string("op_2369_axes_0"), val = tensor([2])]; tensor var_2369_cast_fp16 = expand_dims(axes = var_2369_axes_0, x = coreml_update_state_103)[name = string("op_2369_cast_fp16")]; tensor v_exp_45_reps_0 = const()[name = string("v_exp_45_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_45_cast_fp16 = tile(reps = v_exp_45_reps_0, x = var_2369_cast_fp16)[name = string("v_exp_45_cast_fp16")]; tensor var_2372 = const()[name = string("op_2372"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_47_cast_fp16 = reshape(shape = var_2372, x = v_exp_45_cast_fp16)[name = string("v_exp_47_cast_fp16")]; bool var_2375_transpose_x_1 = const()[name = string("op_2375_transpose_x_1"), val = bool(false)]; bool var_2375_transpose_y_1 = const()[name = string("op_2375_transpose_y_1"), val = bool(true)]; tensor var_2375_cast_fp16 = matmul(transpose_x = var_2375_transpose_x_1, transpose_y = var_2375_transpose_y_1, x = q_23_cast_fp16, y = k_exp_47_cast_fp16)[name = string("op_2375_cast_fp16")]; fp16 var_2376_to_fp16 = const()[name = string("op_2376_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_45_cast_fp16 = mul(x = var_2375_cast_fp16, y = var_2376_to_fp16)[name = string("attn_45_cast_fp16")]; tensor input_111_cast_fp16 = add(x = attn_45_cast_fp16, y = attention_mask_to_fp16)[name = string("input_111_cast_fp16")]; tensor attn_47_cast_fp16 = softmax(axis = var_2233, x = input_111_cast_fp16)[name = string("attn_47_cast_fp16")]; bool out_23_transpose_x_0 = const()[name = string("out_23_transpose_x_0"), val = bool(false)]; bool out_23_transpose_y_0 = const()[name = string("out_23_transpose_y_0"), val = bool(false)]; tensor out_23_cast_fp16 = matmul(transpose_x = out_23_transpose_x_0, transpose_y = out_23_transpose_y_0, x = attn_47_cast_fp16, y = v_exp_47_cast_fp16)[name = string("out_23_cast_fp16")]; tensor var_2381_perm_0 = const()[name = string("op_2381_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2382 = const()[name = string("op_2382"), val = tensor([1, 128, -1])]; tensor var_2381_cast_fp16 = transpose(perm = var_2381_perm_0, x = out_23_cast_fp16)[name = string("transpose_14")]; tensor input_113_cast_fp16 = reshape(shape = var_2382, x = var_2381_cast_fp16)[name = string("input_113_cast_fp16")]; tensor layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179426752))))[name = string("layers_11_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor x_305_cast_fp16 = add(x = x_285_cast_fp16, y = linear_80_cast_fp16)[name = string("x_305_cast_fp16")]; fp16 var_2232_promoted_3_to_fp16 = const()[name = string("op_2232_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2389_cast_fp16 = pow(x = x_305_cast_fp16, y = var_2232_promoted_3_to_fp16)[name = string("op_2389_cast_fp16")]; tensor var_2391_axes_0 = const()[name = string("op_2391_axes_0"), val = tensor([-1])]; bool var_2391_keep_dims_0 = const()[name = string("op_2391_keep_dims_0"), val = bool(true)]; tensor var_2391_cast_fp16 = reduce_mean(axes = var_2391_axes_0, keep_dims = var_2391_keep_dims_0, x = var_2389_cast_fp16)[name = string("op_2391_cast_fp16")]; fp16 var_2392_to_fp16 = const()[name = string("op_2392_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2393_cast_fp16 = add(x = var_2391_cast_fp16, y = var_2392_to_fp16)[name = string("op_2393_cast_fp16")]; fp32 norm_95_epsilon_0 = const()[name = string("norm_95_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_95_cast_fp16 = rsqrt(epsilon = norm_95_epsilon_0, x = var_2393_cast_fp16)[name = string("norm_95_cast_fp16")]; tensor var_2395_cast_fp16 = mul(x = x_305_cast_fp16, y = norm_95_cast_fp16)[name = string("op_2395_cast_fp16")]; tensor layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427328)))]; tensor var_2396_cast_fp16 = mul(x = var_2395_cast_fp16, y = layers_11_post_attention_layernorm_weight_to_fp16)[name = string("op_2396_cast_fp16")]; tensor layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179429440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575232))))[name = string("layers_11_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = var_2396_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2406_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2406_cast_fp16")]; tensor layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185721600))))[name = string("layers_11_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_up_proj_weight_to_fp16_palettized, x = var_2396_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_119_cast_fp16 = mul(x = var_2406_cast_fp16, y = linear_82_cast_fp16)[name = string("input_119_cast_fp16")]; tensor layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185722176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188867968))))[name = string("layers_11_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor x_311_cast_fp16 = add(x = x_305_cast_fp16, y = linear_83_cast_fp16)[name = string("x_311_cast_fp16")]; int32 var_2427 = const()[name = string("op_2427"), val = int32(-1)]; fp16 var_2426_promoted_to_fp16 = const()[name = string("op_2426_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2436_cast_fp16 = pow(x = x_311_cast_fp16, y = var_2426_promoted_to_fp16)[name = string("op_2436_cast_fp16")]; tensor var_2438_axes_0 = const()[name = string("op_2438_axes_0"), val = tensor([-1])]; bool var_2438_keep_dims_0 = const()[name = string("op_2438_keep_dims_0"), val = bool(true)]; tensor var_2438_cast_fp16 = reduce_mean(axes = var_2438_axes_0, keep_dims = var_2438_keep_dims_0, x = var_2436_cast_fp16)[name = string("op_2438_cast_fp16")]; fp16 var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2440_cast_fp16 = add(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = string("op_2440_cast_fp16")]; fp32 norm_97_epsilon_0 = const()[name = string("norm_97_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_97_cast_fp16 = rsqrt(epsilon = norm_97_epsilon_0, x = var_2440_cast_fp16)[name = string("norm_97_cast_fp16")]; tensor var_2442_cast_fp16 = mul(x = x_311_cast_fp16, y = norm_97_cast_fp16)[name = string("op_2442_cast_fp16")]; tensor layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188868544)))]; tensor var_2443_cast_fp16 = mul(x = var_2442_cast_fp16, y = layers_12_input_layernorm_weight_to_fp16)[name = string("op_2443_cast_fp16")]; tensor layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188870656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190967872))))[name = string("layers_12_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = var_2443_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor var_2459 = const()[name = string("op_2459"), val = tensor([1, 128, 16, 128])]; tensor var_2460_cast_fp16 = reshape(shape = var_2459, x = linear_84_cast_fp16)[name = string("op_2460_cast_fp16")]; tensor x_317_perm_0 = const()[name = string("x_317_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190968448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017088))))[name = string("layers_12_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = var_2443_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor var_2464 = const()[name = string("op_2464"), val = tensor([1, 128, 8, 128])]; tensor var_2465_cast_fp16 = reshape(shape = var_2464, x = linear_85_cast_fp16)[name = string("op_2465_cast_fp16")]; tensor x_321_perm_0 = const()[name = string("x_321_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066304))))[name = string("layers_12_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = var_2443_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor var_2469 = const()[name = string("op_2469"), val = tensor([1, 128, 8, 128])]; tensor var_2470_cast_fp16 = reshape(shape = var_2469, x = linear_86_cast_fp16)[name = string("op_2470_cast_fp16")]; tensor transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_2426_promoted_1_to_fp16 = const()[name = string("op_2426_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_317_cast_fp16 = transpose(perm = x_317_perm_0, x = var_2460_cast_fp16)[name = string("transpose_13")]; tensor var_2474_cast_fp16 = pow(x = x_317_cast_fp16, y = var_2426_promoted_1_to_fp16)[name = string("op_2474_cast_fp16")]; tensor var_2476_axes_0 = const()[name = string("op_2476_axes_0"), val = tensor([-1])]; bool var_2476_keep_dims_0 = const()[name = string("op_2476_keep_dims_0"), val = bool(true)]; tensor var_2476_cast_fp16 = reduce_mean(axes = var_2476_axes_0, keep_dims = var_2476_keep_dims_0, x = var_2474_cast_fp16)[name = string("op_2476_cast_fp16")]; fp16 var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2478_cast_fp16 = add(x = var_2476_cast_fp16, y = var_2477_to_fp16)[name = string("op_2478_cast_fp16")]; fp32 norm_99_epsilon_0 = const()[name = string("norm_99_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_99_cast_fp16 = rsqrt(epsilon = norm_99_epsilon_0, x = var_2478_cast_fp16)[name = string("norm_99_cast_fp16")]; tensor var_2480_cast_fp16 = mul(x = x_317_cast_fp16, y = norm_99_cast_fp16)[name = string("op_2480_cast_fp16")]; tensor layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066880)))]; tensor var_2481_cast_fp16 = mul(x = var_2480_cast_fp16, y = layers_12_self_attn_q_norm_weight_to_fp16)[name = string("op_2481_cast_fp16")]; fp16 var_2426_promoted_2_to_fp16 = const()[name = string("op_2426_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_321_cast_fp16 = transpose(perm = x_321_perm_0, x = var_2465_cast_fp16)[name = string("transpose_12")]; tensor var_2485_cast_fp16 = pow(x = x_321_cast_fp16, y = var_2426_promoted_2_to_fp16)[name = string("op_2485_cast_fp16")]; tensor var_2487_axes_0 = const()[name = string("op_2487_axes_0"), val = tensor([-1])]; bool var_2487_keep_dims_0 = const()[name = string("op_2487_keep_dims_0"), val = bool(true)]; tensor var_2487_cast_fp16 = reduce_mean(axes = var_2487_axes_0, keep_dims = var_2487_keep_dims_0, x = var_2485_cast_fp16)[name = string("op_2487_cast_fp16")]; fp16 var_2488_to_fp16 = const()[name = string("op_2488_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2489_cast_fp16 = add(x = var_2487_cast_fp16, y = var_2488_to_fp16)[name = string("op_2489_cast_fp16")]; fp32 norm_101_epsilon_0 = const()[name = string("norm_101_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_101_cast_fp16 = rsqrt(epsilon = norm_101_epsilon_0, x = var_2489_cast_fp16)[name = string("norm_101_cast_fp16")]; tensor var_2491_cast_fp16 = mul(x = x_321_cast_fp16, y = norm_101_cast_fp16)[name = string("op_2491_cast_fp16")]; tensor layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067200)))]; tensor var_2492_cast_fp16 = mul(x = var_2491_cast_fp16, y = layers_12_self_attn_k_norm_weight_to_fp16)[name = string("op_2492_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_2481_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_2481_cast_fp16)[name = string("x2_49_cast_fp16")]; tensor var_2513_cast_fp16 = mul(x = x1_49_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2513_cast_fp16")]; tensor var_2514_cast_fp16 = mul(x = x2_49_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2514_cast_fp16")]; tensor var_2515_cast_fp16 = sub(x = var_2513_cast_fp16, y = var_2514_cast_fp16)[name = string("op_2515_cast_fp16")]; tensor var_2516_cast_fp16 = mul(x = x2_49_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2516_cast_fp16")]; tensor var_2517_cast_fp16 = mul(x = x1_49_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2517_cast_fp16")]; tensor var_2518_cast_fp16 = add(x = var_2516_cast_fp16, y = var_2517_cast_fp16)[name = string("op_2518_cast_fp16")]; bool q_25_interleave_0 = const()[name = string("q_25_interleave_0"), val = bool(false)]; tensor q_25_cast_fp16 = concat(axis = var_2427, interleave = q_25_interleave_0, values = (var_2515_cast_fp16, var_2518_cast_fp16))[name = string("q_25_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_2492_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_2492_cast_fp16)[name = string("x2_51_cast_fp16")]; tensor var_2540_cast_fp16 = mul(x = x1_51_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2540_cast_fp16")]; tensor var_2541_cast_fp16 = mul(x = x2_51_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2541_cast_fp16")]; tensor var_2542_cast_fp16 = sub(x = var_2540_cast_fp16, y = var_2541_cast_fp16)[name = string("op_2542_cast_fp16")]; tensor var_2543_cast_fp16 = mul(x = x2_51_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2543_cast_fp16")]; tensor var_2544_cast_fp16 = mul(x = x1_51_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2544_cast_fp16")]; tensor var_2545_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2544_cast_fp16)[name = string("op_2545_cast_fp16")]; bool var_2547_interleave_0 = const()[name = string("op_2547_interleave_0"), val = bool(false)]; tensor var_2547_cast_fp16 = concat(axis = var_2427, interleave = var_2547_interleave_0, values = (var_2542_cast_fp16, var_2545_cast_fp16))[name = string("op_2547_cast_fp16")]; tensor transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_220 = const()[name = string("concat_220"), val = tensor([128, 1024])]; tensor transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = var_2547_cast_fp16)[name = string("transpose_11")]; tensor reshape_73_cast_fp16 = reshape(shape = concat_220, x = transpose_49_cast_fp16)[name = string("reshape_73_cast_fp16")]; bool matmul_24_transpose_x_1 = const()[name = string("matmul_24_transpose_x_1"), val = bool(true)]; bool matmul_24_transpose_y_1 = const()[name = string("matmul_24_transpose_y_1"), val = bool(false)]; tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_1, transpose_y = matmul_24_transpose_y_1, x = var_68_to_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")]; tensor concat_223 = const()[name = string("concat_223"), val = tensor([1024, 1, 8, 128])]; tensor reshape_74_cast_fp16 = reshape(shape = concat_223, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")]; tensor scattered_k_25_perm_0 = const()[name = string("scattered_k_25_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_228 = const()[name = string("concat_228"), val = tensor([128, 1024])]; tensor transpose_68_cast_fp16 = transpose(perm = transpose_68_perm_0, x = var_2470_cast_fp16)[name = string("transpose_10")]; tensor reshape_76_cast_fp16 = reshape(shape = concat_228, x = transpose_68_cast_fp16)[name = string("reshape_76_cast_fp16")]; bool matmul_25_transpose_x_1 = const()[name = string("matmul_25_transpose_x_1"), val = bool(true)]; bool matmul_25_transpose_y_1 = const()[name = string("matmul_25_transpose_y_1"), val = bool(false)]; tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_1, transpose_y = matmul_25_transpose_y_1, x = var_68_to_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")]; tensor concat_231 = const()[name = string("concat_231"), val = tensor([1024, 1, 8, 128])]; tensor reshape_77_cast_fp16 = reshape(shape = concat_231, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")]; tensor scattered_v_25_perm_0 = const()[name = string("scattered_v_25_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_24 = read_state(input = k_cache_12)[name = string("read_state_24")]; tensor k_cache_75_cast_fp16 = mul(x = read_state_24, y = var_224_cast_fp16)[name = string("k_cache_75_cast_fp16")]; write_state(data = k_cache_75_cast_fp16, input = k_cache_12)[name = string("coreml_update_state_104_write_state")]; tensor coreml_update_state_104 = read_state(input = k_cache_12)[name = string("coreml_update_state_104")]; tensor scattered_k_25_cast_fp16 = transpose(perm = scattered_k_25_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_9")]; tensor k_cache_77_cast_fp16 = add(x = coreml_update_state_104, y = scattered_k_25_cast_fp16)[name = string("k_cache_77_cast_fp16")]; write_state(data = k_cache_77_cast_fp16, input = k_cache_12)[name = string("coreml_update_state_105_write_state")]; tensor coreml_update_state_105 = read_state(input = k_cache_12)[name = string("coreml_update_state_105")]; tensor read_state_25 = read_state(input = v_cache_12)[name = string("read_state_25")]; tensor v_cache_75_cast_fp16 = mul(x = read_state_25, y = var_224_cast_fp16)[name = string("v_cache_75_cast_fp16")]; write_state(data = v_cache_75_cast_fp16, input = v_cache_12)[name = string("coreml_update_state_106_write_state")]; tensor coreml_update_state_106 = read_state(input = v_cache_12)[name = string("coreml_update_state_106")]; tensor scattered_v_25_cast_fp16 = transpose(perm = scattered_v_25_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_8")]; tensor v_cache_77_cast_fp16 = add(x = coreml_update_state_106, y = scattered_v_25_cast_fp16)[name = string("v_cache_77_cast_fp16")]; write_state(data = v_cache_77_cast_fp16, input = v_cache_12)[name = string("coreml_update_state_107_write_state")]; tensor coreml_update_state_107 = read_state(input = v_cache_12)[name = string("coreml_update_state_107")]; tensor var_2558_axes_0 = const()[name = string("op_2558_axes_0"), val = tensor([2])]; tensor var_2558_cast_fp16 = expand_dims(axes = var_2558_axes_0, x = coreml_update_state_105)[name = string("op_2558_cast_fp16")]; tensor k_exp_49_reps_0 = const()[name = string("k_exp_49_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_49_cast_fp16 = tile(reps = k_exp_49_reps_0, x = var_2558_cast_fp16)[name = string("k_exp_49_cast_fp16")]; tensor var_2561 = const()[name = string("op_2561"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_51_cast_fp16 = reshape(shape = var_2561, x = k_exp_49_cast_fp16)[name = string("k_exp_51_cast_fp16")]; tensor var_2563_axes_0 = const()[name = string("op_2563_axes_0"), val = tensor([2])]; tensor var_2563_cast_fp16 = expand_dims(axes = var_2563_axes_0, x = coreml_update_state_107)[name = string("op_2563_cast_fp16")]; tensor v_exp_49_reps_0 = const()[name = string("v_exp_49_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_49_cast_fp16 = tile(reps = v_exp_49_reps_0, x = var_2563_cast_fp16)[name = string("v_exp_49_cast_fp16")]; tensor var_2566 = const()[name = string("op_2566"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_51_cast_fp16 = reshape(shape = var_2566, x = v_exp_49_cast_fp16)[name = string("v_exp_51_cast_fp16")]; bool var_2569_transpose_x_1 = const()[name = string("op_2569_transpose_x_1"), val = bool(false)]; bool var_2569_transpose_y_1 = const()[name = string("op_2569_transpose_y_1"), val = bool(true)]; tensor var_2569_cast_fp16 = matmul(transpose_x = var_2569_transpose_x_1, transpose_y = var_2569_transpose_y_1, x = q_25_cast_fp16, y = k_exp_51_cast_fp16)[name = string("op_2569_cast_fp16")]; fp16 var_2570_to_fp16 = const()[name = string("op_2570_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_49_cast_fp16 = mul(x = var_2569_cast_fp16, y = var_2570_to_fp16)[name = string("attn_49_cast_fp16")]; tensor input_121_cast_fp16 = add(x = attn_49_cast_fp16, y = attention_mask_to_fp16)[name = string("input_121_cast_fp16")]; tensor attn_51_cast_fp16 = softmax(axis = var_2427, x = input_121_cast_fp16)[name = string("attn_51_cast_fp16")]; bool out_25_transpose_x_0 = const()[name = string("out_25_transpose_x_0"), val = bool(false)]; bool out_25_transpose_y_0 = const()[name = string("out_25_transpose_y_0"), val = bool(false)]; tensor out_25_cast_fp16 = matmul(transpose_x = out_25_transpose_x_0, transpose_y = out_25_transpose_y_0, x = attn_51_cast_fp16, y = v_exp_51_cast_fp16)[name = string("out_25_cast_fp16")]; tensor var_2575_perm_0 = const()[name = string("op_2575_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2576 = const()[name = string("op_2576"), val = tensor([1, 128, -1])]; tensor var_2575_cast_fp16 = transpose(perm = var_2575_perm_0, x = out_25_cast_fp16)[name = string("transpose_7")]; tensor input_123_cast_fp16 = reshape(shape = var_2576, x = var_2575_cast_fp16)[name = string("input_123_cast_fp16")]; tensor layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195164736))))[name = string("layers_12_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor x_331_cast_fp16 = add(x = x_311_cast_fp16, y = linear_87_cast_fp16)[name = string("x_331_cast_fp16")]; fp16 var_2426_promoted_3_to_fp16 = const()[name = string("op_2426_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2583_cast_fp16 = pow(x = x_331_cast_fp16, y = var_2426_promoted_3_to_fp16)[name = string("op_2583_cast_fp16")]; tensor var_2585_axes_0 = const()[name = string("op_2585_axes_0"), val = tensor([-1])]; bool var_2585_keep_dims_0 = const()[name = string("op_2585_keep_dims_0"), val = bool(true)]; tensor var_2585_cast_fp16 = reduce_mean(axes = var_2585_axes_0, keep_dims = var_2585_keep_dims_0, x = var_2583_cast_fp16)[name = string("op_2585_cast_fp16")]; fp16 var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2587_cast_fp16 = add(x = var_2585_cast_fp16, y = var_2586_to_fp16)[name = string("op_2587_cast_fp16")]; fp32 norm_103_epsilon_0 = const()[name = string("norm_103_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_103_cast_fp16 = rsqrt(epsilon = norm_103_epsilon_0, x = var_2587_cast_fp16)[name = string("norm_103_cast_fp16")]; tensor var_2589_cast_fp16 = mul(x = x_331_cast_fp16, y = norm_103_cast_fp16)[name = string("op_2589_cast_fp16")]; tensor layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195165312)))]; tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = layers_12_post_attention_layernorm_weight_to_fp16)[name = string("op_2590_cast_fp16")]; tensor layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195167424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313216))))[name = string("layers_12_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = var_2590_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2600_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2600_cast_fp16")]; tensor layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201459584))))[name = string("layers_12_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_up_proj_weight_to_fp16_palettized, x = var_2590_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_129_cast_fp16 = mul(x = var_2600_cast_fp16, y = linear_89_cast_fp16)[name = string("input_129_cast_fp16")]; tensor layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201460160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204605952))))[name = string("layers_12_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_90_cast_fp16)[name = string("x_337_cast_fp16")]; int32 var_2621 = const()[name = string("op_2621"), val = int32(-1)]; fp16 var_2620_promoted_to_fp16 = const()[name = string("op_2620_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2630_cast_fp16 = pow(x = x_337_cast_fp16, y = var_2620_promoted_to_fp16)[name = string("op_2630_cast_fp16")]; tensor var_2632_axes_0 = const()[name = string("op_2632_axes_0"), val = tensor([-1])]; bool var_2632_keep_dims_0 = const()[name = string("op_2632_keep_dims_0"), val = bool(true)]; tensor var_2632_cast_fp16 = reduce_mean(axes = var_2632_axes_0, keep_dims = var_2632_keep_dims_0, x = var_2630_cast_fp16)[name = string("op_2632_cast_fp16")]; fp16 var_2633_to_fp16 = const()[name = string("op_2633_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2634_cast_fp16 = add(x = var_2632_cast_fp16, y = var_2633_to_fp16)[name = string("op_2634_cast_fp16")]; fp32 norm_105_epsilon_0 = const()[name = string("norm_105_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_105_cast_fp16 = rsqrt(epsilon = norm_105_epsilon_0, x = var_2634_cast_fp16)[name = string("norm_105_cast_fp16")]; tensor var_2636_cast_fp16 = mul(x = x_337_cast_fp16, y = norm_105_cast_fp16)[name = string("op_2636_cast_fp16")]; tensor layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204606528)))]; tensor var_2637_cast_fp16 = mul(x = var_2636_cast_fp16, y = layers_13_input_layernorm_weight_to_fp16)[name = string("op_2637_cast_fp16")]; tensor layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204608640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206705856))))[name = string("layers_13_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = var_2637_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor var_2653 = const()[name = string("op_2653"), val = tensor([1, 128, 16, 128])]; tensor var_2654_cast_fp16 = reshape(shape = var_2653, x = linear_91_cast_fp16)[name = string("op_2654_cast_fp16")]; tensor x_343_perm_0 = const()[name = string("x_343_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206706432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755072))))[name = string("layers_13_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = var_2637_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor var_2658 = const()[name = string("op_2658"), val = tensor([1, 128, 8, 128])]; tensor var_2659_cast_fp16 = reshape(shape = var_2658, x = linear_92_cast_fp16)[name = string("op_2659_cast_fp16")]; tensor x_347_perm_0 = const()[name = string("x_347_perm_0"), val = tensor([0, 2, 1, 3])]; tensor layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804288))))[name = string("layers_13_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = var_2637_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor var_2663 = const()[name = string("op_2663"), val = tensor([1, 128, 8, 128])]; tensor var_2664_cast_fp16 = reshape(shape = var_2663, x = linear_93_cast_fp16)[name = string("op_2664_cast_fp16")]; tensor transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor([1, 0, 2, 3])]; fp16 var_2620_promoted_1_to_fp16 = const()[name = string("op_2620_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor x_343_cast_fp16 = transpose(perm = x_343_perm_0, x = var_2654_cast_fp16)[name = string("transpose_6")]; tensor var_2668_cast_fp16 = pow(x = x_343_cast_fp16, y = var_2620_promoted_1_to_fp16)[name = string("op_2668_cast_fp16")]; tensor var_2670_axes_0 = const()[name = string("op_2670_axes_0"), val = tensor([-1])]; bool var_2670_keep_dims_0 = const()[name = string("op_2670_keep_dims_0"), val = bool(true)]; tensor var_2670_cast_fp16 = reduce_mean(axes = var_2670_axes_0, keep_dims = var_2670_keep_dims_0, x = var_2668_cast_fp16)[name = string("op_2670_cast_fp16")]; fp16 var_2671_to_fp16 = const()[name = string("op_2671_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2672_cast_fp16 = add(x = var_2670_cast_fp16, y = var_2671_to_fp16)[name = string("op_2672_cast_fp16")]; fp32 norm_107_epsilon_0 = const()[name = string("norm_107_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_107_cast_fp16 = rsqrt(epsilon = norm_107_epsilon_0, x = var_2672_cast_fp16)[name = string("norm_107_cast_fp16")]; tensor var_2674_cast_fp16 = mul(x = x_343_cast_fp16, y = norm_107_cast_fp16)[name = string("op_2674_cast_fp16")]; tensor layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804864)))]; tensor var_2675_cast_fp16 = mul(x = var_2674_cast_fp16, y = layers_13_self_attn_q_norm_weight_to_fp16)[name = string("op_2675_cast_fp16")]; fp16 var_2620_promoted_2_to_fp16 = const()[name = string("op_2620_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor x_347_cast_fp16 = transpose(perm = x_347_perm_0, x = var_2659_cast_fp16)[name = string("transpose_5")]; tensor var_2679_cast_fp16 = pow(x = x_347_cast_fp16, y = var_2620_promoted_2_to_fp16)[name = string("op_2679_cast_fp16")]; tensor var_2681_axes_0 = const()[name = string("op_2681_axes_0"), val = tensor([-1])]; bool var_2681_keep_dims_0 = const()[name = string("op_2681_keep_dims_0"), val = bool(true)]; tensor var_2681_cast_fp16 = reduce_mean(axes = var_2681_axes_0, keep_dims = var_2681_keep_dims_0, x = var_2679_cast_fp16)[name = string("op_2681_cast_fp16")]; fp16 var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2683_cast_fp16 = add(x = var_2681_cast_fp16, y = var_2682_to_fp16)[name = string("op_2683_cast_fp16")]; fp32 norm_109_epsilon_0 = const()[name = string("norm_109_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_109_cast_fp16 = rsqrt(epsilon = norm_109_epsilon_0, x = var_2683_cast_fp16)[name = string("norm_109_cast_fp16")]; tensor var_2685_cast_fp16 = mul(x = x_347_cast_fp16, y = norm_109_cast_fp16)[name = string("op_2685_cast_fp16")]; tensor layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805184)))]; tensor var_2686_cast_fp16 = mul(x = var_2685_cast_fp16, y = layers_13_self_attn_k_norm_weight_to_fp16)[name = string("op_2686_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 128, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_2675_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 128, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_2675_cast_fp16)[name = string("x2_53_cast_fp16")]; tensor var_2707_cast_fp16 = mul(x = x1_53_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2707_cast_fp16")]; tensor var_2708_cast_fp16 = mul(x = x2_53_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2708_cast_fp16")]; tensor var_2709_cast_fp16 = sub(x = var_2707_cast_fp16, y = var_2708_cast_fp16)[name = string("op_2709_cast_fp16")]; tensor var_2710_cast_fp16 = mul(x = x2_53_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2710_cast_fp16")]; tensor var_2711_cast_fp16 = mul(x = x1_53_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2711_cast_fp16")]; tensor var_2712_cast_fp16 = add(x = var_2710_cast_fp16, y = var_2711_cast_fp16)[name = string("op_2712_cast_fp16")]; bool q_interleave_0 = const()[name = string("q_interleave_0"), val = bool(false)]; tensor q_cast_fp16 = concat(axis = var_2621, interleave = q_interleave_0, values = (var_2709_cast_fp16, var_2712_cast_fp16))[name = string("q_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 128, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_2686_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 128, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_2686_cast_fp16)[name = string("x2_cast_fp16")]; tensor var_2734_cast_fp16 = mul(x = x1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2734_cast_fp16")]; tensor var_2735_cast_fp16 = mul(x = x2_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2735_cast_fp16")]; tensor var_2736_cast_fp16 = sub(x = var_2734_cast_fp16, y = var_2735_cast_fp16)[name = string("op_2736_cast_fp16")]; tensor var_2737_cast_fp16 = mul(x = x2_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2737_cast_fp16")]; tensor var_2738_cast_fp16 = mul(x = x1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2738_cast_fp16")]; tensor var_2739_cast_fp16 = add(x = var_2737_cast_fp16, y = var_2738_cast_fp16)[name = string("op_2739_cast_fp16")]; bool var_2741_interleave_0 = const()[name = string("op_2741_interleave_0"), val = bool(false)]; tensor var_2741_cast_fp16 = concat(axis = var_2621, interleave = var_2741_interleave_0, values = (var_2736_cast_fp16, var_2739_cast_fp16))[name = string("op_2741_cast_fp16")]; tensor transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor([2, 0, 1, 3])]; tensor concat_238 = const()[name = string("concat_238"), val = tensor([128, 1024])]; tensor transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = var_2741_cast_fp16)[name = string("transpose_4")]; tensor reshape_79_cast_fp16 = reshape(shape = concat_238, x = transpose_53_cast_fp16)[name = string("reshape_79_cast_fp16")]; bool matmul_26_transpose_x_1 = const()[name = string("matmul_26_transpose_x_1"), val = bool(true)]; bool matmul_26_transpose_y_1 = const()[name = string("matmul_26_transpose_y_1"), val = bool(false)]; tensor matmul_26_cast_fp16 = matmul(transpose_x = matmul_26_transpose_x_1, transpose_y = matmul_26_transpose_y_1, x = var_68_to_fp16, y = reshape_79_cast_fp16)[name = string("matmul_26_cast_fp16")]; tensor concat_241 = const()[name = string("concat_241"), val = tensor([1024, 1, 8, 128])]; tensor reshape_80_cast_fp16 = reshape(shape = concat_241, x = matmul_26_cast_fp16)[name = string("reshape_80_cast_fp16")]; tensor scattered_k_perm_0 = const()[name = string("scattered_k_perm_0"), val = tensor([1, 2, 0, 3])]; tensor concat_246 = const()[name = string("concat_246"), val = tensor([128, 1024])]; tensor transpose_69_cast_fp16 = transpose(perm = transpose_69_perm_0, x = var_2664_cast_fp16)[name = string("transpose_3")]; tensor reshape_82_cast_fp16 = reshape(shape = concat_246, x = transpose_69_cast_fp16)[name = string("reshape_82_cast_fp16")]; bool matmul_27_transpose_x_1 = const()[name = string("matmul_27_transpose_x_1"), val = bool(true)]; bool matmul_27_transpose_y_1 = const()[name = string("matmul_27_transpose_y_1"), val = bool(false)]; tensor matmul_27_cast_fp16 = matmul(transpose_x = matmul_27_transpose_x_1, transpose_y = matmul_27_transpose_y_1, x = var_68_to_fp16, y = reshape_82_cast_fp16)[name = string("matmul_27_cast_fp16")]; tensor concat_249 = const()[name = string("concat_249"), val = tensor([1024, 1, 8, 128])]; tensor reshape_83_cast_fp16 = reshape(shape = concat_249, x = matmul_27_cast_fp16)[name = string("reshape_83_cast_fp16")]; tensor scattered_v_perm_0 = const()[name = string("scattered_v_perm_0"), val = tensor([1, 2, 0, 3])]; tensor read_state_26 = read_state(input = k_cache_13)[name = string("read_state_26")]; tensor k_cache_81_cast_fp16 = mul(x = read_state_26, y = var_224_cast_fp16)[name = string("k_cache_81_cast_fp16")]; write_state(data = k_cache_81_cast_fp16, input = k_cache_13)[name = string("coreml_update_state_108_write_state")]; tensor coreml_update_state_108 = read_state(input = k_cache_13)[name = string("coreml_update_state_108")]; tensor scattered_k_cast_fp16 = transpose(perm = scattered_k_perm_0, x = reshape_80_cast_fp16)[name = string("transpose_2")]; tensor k_cache_cast_fp16 = add(x = coreml_update_state_108, y = scattered_k_cast_fp16)[name = string("k_cache_cast_fp16")]; write_state(data = k_cache_cast_fp16, input = k_cache_13)[name = string("coreml_update_state_109_write_state")]; tensor coreml_update_state_109 = read_state(input = k_cache_13)[name = string("coreml_update_state_109")]; tensor read_state_27 = read_state(input = v_cache_13)[name = string("read_state_27")]; tensor v_cache_81_cast_fp16 = mul(x = read_state_27, y = var_224_cast_fp16)[name = string("v_cache_81_cast_fp16")]; write_state(data = v_cache_81_cast_fp16, input = v_cache_13)[name = string("coreml_update_state_110_write_state")]; tensor coreml_update_state_110 = read_state(input = v_cache_13)[name = string("coreml_update_state_110")]; tensor scattered_v_cast_fp16 = transpose(perm = scattered_v_perm_0, x = reshape_83_cast_fp16)[name = string("transpose_1")]; tensor v_cache_cast_fp16 = add(x = coreml_update_state_110, y = scattered_v_cast_fp16)[name = string("v_cache_cast_fp16")]; write_state(data = v_cache_cast_fp16, input = v_cache_13)[name = string("coreml_update_state_111_write_state")]; tensor coreml_update_state_111 = read_state(input = v_cache_13)[name = string("coreml_update_state_111")]; tensor var_2752_axes_0 = const()[name = string("op_2752_axes_0"), val = tensor([2])]; tensor var_2752_cast_fp16 = expand_dims(axes = var_2752_axes_0, x = coreml_update_state_109)[name = string("op_2752_cast_fp16")]; tensor k_exp_53_reps_0 = const()[name = string("k_exp_53_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor k_exp_53_cast_fp16 = tile(reps = k_exp_53_reps_0, x = var_2752_cast_fp16)[name = string("k_exp_53_cast_fp16")]; tensor var_2755 = const()[name = string("op_2755"), val = tensor([1, 16, 1024, 128])]; tensor k_exp_cast_fp16 = reshape(shape = var_2755, x = k_exp_53_cast_fp16)[name = string("k_exp_cast_fp16")]; tensor var_2757_axes_0 = const()[name = string("op_2757_axes_0"), val = tensor([2])]; tensor var_2757_cast_fp16 = expand_dims(axes = var_2757_axes_0, x = coreml_update_state_111)[name = string("op_2757_cast_fp16")]; tensor v_exp_53_reps_0 = const()[name = string("v_exp_53_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor v_exp_53_cast_fp16 = tile(reps = v_exp_53_reps_0, x = var_2757_cast_fp16)[name = string("v_exp_53_cast_fp16")]; tensor var_2760 = const()[name = string("op_2760"), val = tensor([1, 16, 1024, 128])]; tensor v_exp_cast_fp16 = reshape(shape = var_2760, x = v_exp_53_cast_fp16)[name = string("v_exp_cast_fp16")]; bool var_2763_transpose_x_1 = const()[name = string("op_2763_transpose_x_1"), val = bool(false)]; bool var_2763_transpose_y_1 = const()[name = string("op_2763_transpose_y_1"), val = bool(true)]; tensor var_2763_cast_fp16 = matmul(transpose_x = var_2763_transpose_x_1, transpose_y = var_2763_transpose_y_1, x = q_cast_fp16, y = k_exp_cast_fp16)[name = string("op_2763_cast_fp16")]; fp16 var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = fp16(0x1.6ap-4)]; tensor attn_53_cast_fp16 = mul(x = var_2763_cast_fp16, y = var_2764_to_fp16)[name = string("attn_53_cast_fp16")]; tensor input_131_cast_fp16 = add(x = attn_53_cast_fp16, y = attention_mask_to_fp16)[name = string("input_131_cast_fp16")]; tensor attn_cast_fp16 = softmax(axis = var_2621, x = input_131_cast_fp16)[name = string("attn_cast_fp16")]; bool out_transpose_x_0 = const()[name = string("out_transpose_x_0"), val = bool(false)]; bool out_transpose_y_0 = const()[name = string("out_transpose_y_0"), val = bool(false)]; tensor out_cast_fp16 = matmul(transpose_x = out_transpose_x_0, transpose_y = out_transpose_y_0, x = attn_cast_fp16, y = v_exp_cast_fp16)[name = string("out_cast_fp16")]; tensor var_2769_perm_0 = const()[name = string("op_2769_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2770 = const()[name = string("op_2770"), val = tensor([1, 128, -1])]; tensor var_2769_cast_fp16 = transpose(perm = var_2769_perm_0, x = out_cast_fp16)[name = string("transpose_0")]; tensor input_133_cast_fp16 = reshape(shape = var_2770, x = var_2769_cast_fp16)[name = string("input_133_cast_fp16")]; tensor layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210902720))))[name = string("layers_13_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor x_357_cast_fp16 = add(x = x_337_cast_fp16, y = linear_94_cast_fp16)[name = string("x_357_cast_fp16")]; fp16 var_2620_promoted_3_to_fp16 = const()[name = string("op_2620_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_2777_cast_fp16 = pow(x = x_357_cast_fp16, y = var_2620_promoted_3_to_fp16)[name = string("op_2777_cast_fp16")]; tensor var_2779_axes_0 = const()[name = string("op_2779_axes_0"), val = tensor([-1])]; bool var_2779_keep_dims_0 = const()[name = string("op_2779_keep_dims_0"), val = bool(true)]; tensor var_2779_cast_fp16 = reduce_mean(axes = var_2779_axes_0, keep_dims = var_2779_keep_dims_0, x = var_2777_cast_fp16)[name = string("op_2779_cast_fp16")]; fp16 var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2781_cast_fp16 = add(x = var_2779_cast_fp16, y = var_2780_to_fp16)[name = string("op_2781_cast_fp16")]; fp32 norm_111_epsilon_0 = const()[name = string("norm_111_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_111_cast_fp16 = rsqrt(epsilon = norm_111_epsilon_0, x = var_2781_cast_fp16)[name = string("norm_111_cast_fp16")]; tensor var_2783_cast_fp16 = mul(x = x_357_cast_fp16, y = norm_111_cast_fp16)[name = string("op_2783_cast_fp16")]; tensor layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210903296)))]; tensor var_2784_cast_fp16 = mul(x = var_2783_cast_fp16, y = layers_13_post_attention_layernorm_weight_to_fp16)[name = string("op_2784_cast_fp16")]; tensor layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210905408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051200))))[name = string("layers_13_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = var_2784_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_2794_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2794_cast_fp16")]; tensor layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217197568))))[name = string("layers_13_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_up_proj_weight_to_fp16_palettized, x = var_2784_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_139_cast_fp16 = mul(x = var_2794_cast_fp16, y = linear_96_cast_fp16)[name = string("input_139_cast_fp16")]; tensor layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217198144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220343936))))[name = string("layers_13_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_97_cast_fp16)[name = string("x_363_cast_fp16")]; tensor var_2810_begin_0 = const()[name = string("op_2810_begin_0"), val = tensor([0, -1, 0])]; tensor var_2810_end_0 = const()[name = string("op_2810_end_0"), val = tensor([1, 128, 1024])]; tensor var_2810_end_mask_0 = const()[name = string("op_2810_end_mask_0"), val = tensor([true, true, true])]; tensor var_2810_cast_fp16 = slice_by_index(begin = var_2810_begin_0, end = var_2810_end_0, end_mask = var_2810_end_mask_0, x = x_363_cast_fp16)[name = string("op_2810_cast_fp16")]; fp16 var_2819_promoted_to_fp16 = const()[name = string("op_2819_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2825_cast_fp16 = pow(x = var_2810_cast_fp16, y = var_2819_promoted_to_fp16)[name = string("op_2825_cast_fp16")]; tensor var_2827_axes_0 = const()[name = string("op_2827_axes_0"), val = tensor([-1])]; bool var_2827_keep_dims_0 = const()[name = string("op_2827_keep_dims_0"), val = bool(true)]; tensor var_2827_cast_fp16 = reduce_mean(axes = var_2827_axes_0, keep_dims = var_2827_keep_dims_0, x = var_2825_cast_fp16)[name = string("op_2827_cast_fp16")]; fp16 var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2829_cast_fp16 = add(x = var_2827_cast_fp16, y = var_2828_to_fp16)[name = string("op_2829_cast_fp16")]; fp32 norm_113_epsilon_0 = const()[name = string("norm_113_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor norm_113_cast_fp16 = rsqrt(epsilon = norm_113_epsilon_0, x = var_2829_cast_fp16)[name = string("norm_113_cast_fp16")]; tensor var_2831_cast_fp16 = mul(x = var_2810_cast_fp16, y = norm_113_cast_fp16)[name = string("op_2831_cast_fp16")]; tensor norm_weight_to_fp16 = const()[name = string("norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220344512)))]; tensor var_2832_cast_fp16 = mul(x = var_2831_cast_fp16, y = norm_weight_to_fp16)[name = string("op_2832_cast_fp16")]; tensor lm_head_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220346624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375929152))))[name = string("lm_head_weight_to_fp16_palettized")]; tensor linear_98_bias_0_to_fp16 = const()[name = string("linear_98_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375929728)))]; tensor logits = linear(bias = linear_98_bias_0_to_fp16, weight = lm_head_weight_to_fp16_palettized, x = var_2832_cast_fp16)[name = string("linear_98_cast_fp16")]; } -> (logits); }