busyfarm-org's picture
Add files using upload-large-folder tool
aaa80bb verified
Raw
History Blame Contribute Delete
395 kB
program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})]
{
func main<ios18>(tensor<fp32, [1, 1, 128, 1024]> attention_mask, tensor<fp32, [1, 128, 1024]> input_embeds, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_0, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_1, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_10, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_11, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_12, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_13, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_2, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_3, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_4, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_5, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_6, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_7, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_8, state<tensor<fp16, [1, 8, 1024, 128]>> k_cache_9, tensor<int32, [128]> positions, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_0, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_1, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_10, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_11, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_12, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_13, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_2, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_3, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_4, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_5, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_6, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_7, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_8, state<tensor<fp16, [1, 8, 1024, 128]>> v_cache_9) {
int32 var_68_one_hot_vector_size_0 = const()[name = string("op_68_one_hot_vector_size_0"), val = int32(1024)];
int32 var_68_axis_0 = const()[name = string("op_68_axis_0"), val = int32(-1)];
int32 var_68_on_value_0 = const()[name = string("op_68_on_value_0"), val = int32(1)];
int32 var_68_off_value_0 = const()[name = string("op_68_off_value_0"), val = int32(0)];
tensor<int32, [128, 1024]> var_68 = one_hot(axis = var_68_axis_0, indices = positions, off_value = var_68_off_value_0, on_value = var_68_on_value_0, one_hot_vector_size = var_68_one_hot_vector_size_0)[name = string("op_68")];
tensor<int32, [1]> var_78_axes_0 = const()[name = string("op_78_axes_0"), val = tensor<int32, [1]>([0])];
bool var_78_keep_dims_0 = const()[name = string("op_78_keep_dims_0"), val = bool(false)];
string cast_1_to_fp16_dtype_0 = const()[name = string("cast_1_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [128, 1024]> var_68_to_fp16 = cast(dtype = cast_1_to_fp16_dtype_0, x = var_68)[name = string("cast_3")];
tensor<fp16, [1024]> var_78_cast_fp16 = reduce_sum(axes = var_78_axes_0, keep_dims = var_78_keep_dims_0, x = var_68_to_fp16)[name = string("op_78_cast_fp16")];
tensor<int32, [4]> var_83 = const()[name = string("op_83"), val = tensor<int32, [4]>([1, 1, 1024, 1])];
tensor<fp16, [1, 1, 1024, 1]> var_84_cast_fp16 = reshape(shape = var_83, x = var_78_cast_fp16)[name = string("op_84_cast_fp16")];
int32 var_99 = const()[name = string("op_99"), val = int32(-1)];
string input_embeds_to_fp16_dtype_0 = const()[name = string("input_embeds_to_fp16_dtype_0"), val = string("fp16")];
fp16 var_98_promoted_to_fp16 = const()[name = string("op_98_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> input_embeds_to_fp16 = cast(dtype = input_embeds_to_fp16_dtype_0, x = input_embeds)[name = string("cast_2")];
tensor<fp16, [1, 128, 1024]> var_108_cast_fp16 = pow(x = input_embeds_to_fp16, y = var_98_promoted_to_fp16)[name = string("op_108_cast_fp16")];
tensor<int32, [1]> var_110_axes_0 = const()[name = string("op_110_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_110_keep_dims_0 = const()[name = string("op_110_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_110_cast_fp16 = reduce_mean(axes = var_110_axes_0, keep_dims = var_110_keep_dims_0, x = var_108_cast_fp16)[name = string("op_110_cast_fp16")];
fp16 var_111_to_fp16 = const()[name = string("op_111_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_112_cast_fp16 = add(x = var_110_cast_fp16, y = var_111_to_fp16)[name = string("op_112_cast_fp16")];
fp32 norm_1_epsilon_0 = const()[name = string("norm_1_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_1_cast_fp16 = rsqrt(epsilon = norm_1_epsilon_0, x = var_112_cast_fp16)[name = string("norm_1_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_114_cast_fp16 = mul(x = input_embeds_to_fp16, y = norm_1_cast_fp16)[name = string("op_114_cast_fp16")];
tensor<fp16, [1024]> layers_0_input_layernorm_weight_to_fp16 = const()[name = string("layers_0_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
tensor<fp16, [1, 128, 1024]> var_115_cast_fp16 = mul(x = var_114_cast_fp16, y = layers_0_input_layernorm_weight_to_fp16)[name = string("op_115_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2176))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099392))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [2048]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099968)))];
tensor<fp16, [1, 128, 2048]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = var_115_cast_fp16)[name = string("linear_0_cast_fp16")];
tensor<int32, [4]> var_131 = const()[name = string("op_131"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_132_cast_fp16 = reshape(shape = var_131, x = linear_0_cast_fp16)[name = string("op_132_cast_fp16")];
tensor<int32, [4]> x_5_perm_0 = const()[name = string("x_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2104128))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3152768))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1024]> linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3153344)))];
tensor<fp16, [1, 128, 1024]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = var_115_cast_fp16)[name = string("linear_1_cast_fp16")];
tensor<int32, [4]> var_136 = const()[name = string("op_136"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_137_cast_fp16 = reshape(shape = var_136, x = linear_1_cast_fp16)[name = string("op_137_cast_fp16")];
tensor<int32, [4]> x_9_perm_0 = const()[name = string("x_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3155456))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204096))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = var_115_cast_fp16)[name = string("linear_2_cast_fp16")];
tensor<int32, [4]> var_141 = const()[name = string("op_141"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_142_cast_fp16 = reshape(shape = var_141, x = linear_2_cast_fp16)[name = string("op_142_cast_fp16")];
tensor<int32, [4]> transpose_56_perm_0 = const()[name = string("transpose_56_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_98_promoted_1_to_fp16 = const()[name = string("op_98_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_5_cast_fp16 = transpose(perm = x_5_perm_0, x = var_132_cast_fp16)[name = string("transpose_97")];
tensor<fp16, [1, 16, 128, 128]> var_146_cast_fp16 = pow(x = x_5_cast_fp16, y = var_98_promoted_1_to_fp16)[name = string("op_146_cast_fp16")];
tensor<int32, [1]> var_148_axes_0 = const()[name = string("op_148_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_148_keep_dims_0 = const()[name = string("op_148_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_148_cast_fp16 = reduce_mean(axes = var_148_axes_0, keep_dims = var_148_keep_dims_0, x = var_146_cast_fp16)[name = string("op_148_cast_fp16")];
fp16 var_149_to_fp16 = const()[name = string("op_149_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_150_cast_fp16 = add(x = var_148_cast_fp16, y = var_149_to_fp16)[name = string("op_150_cast_fp16")];
fp32 norm_3_epsilon_0 = const()[name = string("norm_3_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_3_cast_fp16 = rsqrt(epsilon = norm_3_epsilon_0, x = var_150_cast_fp16)[name = string("norm_3_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_152_cast_fp16 = mul(x = x_5_cast_fp16, y = norm_3_cast_fp16)[name = string("op_152_cast_fp16")];
tensor<fp16, [128]> layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204672)))];
tensor<fp16, [1, 16, 128, 128]> var_153_cast_fp16 = mul(x = var_152_cast_fp16, y = layers_0_self_attn_q_norm_weight_to_fp16)[name = string("op_153_cast_fp16")];
fp16 var_98_promoted_2_to_fp16 = const()[name = string("op_98_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_9_cast_fp16 = transpose(perm = x_9_perm_0, x = var_137_cast_fp16)[name = string("transpose_96")];
tensor<fp16, [1, 8, 128, 128]> var_157_cast_fp16 = pow(x = x_9_cast_fp16, y = var_98_promoted_2_to_fp16)[name = string("op_157_cast_fp16")];
tensor<int32, [1]> var_159_axes_0 = const()[name = string("op_159_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_159_keep_dims_0 = const()[name = string("op_159_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_159_cast_fp16 = reduce_mean(axes = var_159_axes_0, keep_dims = var_159_keep_dims_0, x = var_157_cast_fp16)[name = string("op_159_cast_fp16")];
fp16 var_160_to_fp16 = const()[name = string("op_160_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_161_cast_fp16 = add(x = var_159_cast_fp16, y = var_160_to_fp16)[name = string("op_161_cast_fp16")];
fp32 norm_5_epsilon_0 = const()[name = string("norm_5_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_5_cast_fp16 = rsqrt(epsilon = norm_5_epsilon_0, x = var_161_cast_fp16)[name = string("norm_5_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_163_cast_fp16 = mul(x = x_9_cast_fp16, y = norm_5_cast_fp16)[name = string("op_163_cast_fp16")];
tensor<fp16, [128]> layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204992)))];
tensor<fp16, [1, 8, 128, 128]> var_164_cast_fp16 = mul(x = var_163_cast_fp16, y = layers_0_self_attn_k_norm_weight_to_fp16)[name = string("op_164_cast_fp16")];
tensor<int32, [1]> var_168_axes_0 = const()[name = string("op_168_axes_0"), val = tensor<int32, [1]>([-1])];
string cast_12_to_fp16_dtype_0 = const()[name = string("cast_12_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [128]> positions_to_fp16 = cast(dtype = cast_12_to_fp16_dtype_0, x = positions)[name = string("cast_1")];
tensor<fp16, [128, 1]> var_168_cast_fp16 = expand_dims(axes = var_168_axes_0, x = positions_to_fp16)[name = string("op_168_cast_fp16")];
tensor<fp16, [64]> layers_0_self_attn_rope_inv_freq_to_fp16 = const()[name = string("layers_0_self_attn_rope_inv_freq_to_fp16"), val = tensor<fp16, [64]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205312)))];
tensor<fp16, [128, 64]> freqs_1_cast_fp16 = mul(x = var_168_cast_fp16, y = layers_0_self_attn_rope_inv_freq_to_fp16)[name = string("freqs_1_cast_fp16")];
tensor<fp16, [128, 64]> var_170_cast_fp16 = cos(x = freqs_1_cast_fp16)[name = string("op_170_cast_fp16")];
tensor<int32, [4]> var_172 = const()[name = string("op_172"), val = tensor<int32, [4]>([1, 1, -1, 64])];
tensor<fp16, [1, 1, 128, 64]> cos_val_1_cast_fp16 = reshape(shape = var_172, x = var_170_cast_fp16)[name = string("cos_val_1_cast_fp16")];
tensor<fp16, [128, 64]> var_174_cast_fp16 = sin(x = freqs_1_cast_fp16)[name = string("op_174_cast_fp16")];
tensor<int32, [4]> var_176 = const()[name = string("op_176"), val = tensor<int32, [4]>([1, 1, -1, 64])];
tensor<fp16, [1, 1, 128, 64]> sin_val_1_cast_fp16 = reshape(shape = var_176, x = var_174_cast_fp16)[name = string("sin_val_1_cast_fp16")];
tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_153_cast_fp16)[name = string("x1_1_cast_fp16")];
tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_153_cast_fp16)[name = string("x2_1_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_185_cast_fp16 = mul(x = x1_1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_185_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_186_cast_fp16 = mul(x = x2_1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_186_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_187_cast_fp16 = sub(x = var_185_cast_fp16, y = var_186_cast_fp16)[name = string("op_187_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_188_cast_fp16 = mul(x = x2_1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_188_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_189_cast_fp16 = mul(x = x1_1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_189_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_190_cast_fp16 = add(x = var_188_cast_fp16, y = var_189_cast_fp16)[name = string("op_190_cast_fp16")];
bool q_1_interleave_0 = const()[name = string("q_1_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_1_cast_fp16 = concat(axis = var_99, interleave = q_1_interleave_0, values = (var_187_cast_fp16, var_190_cast_fp16))[name = string("q_1_cast_fp16")];
tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_164_cast_fp16)[name = string("x1_3_cast_fp16")];
tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_164_cast_fp16)[name = string("x2_3_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_212_cast_fp16 = mul(x = x1_3_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_212_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_213_cast_fp16 = mul(x = x2_3_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_213_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_214_cast_fp16 = sub(x = var_212_cast_fp16, y = var_213_cast_fp16)[name = string("op_214_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_215_cast_fp16 = mul(x = x2_3_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_215_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_216_cast_fp16 = mul(x = x1_3_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_216_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_217_cast_fp16 = add(x = var_215_cast_fp16, y = var_216_cast_fp16)[name = string("op_217_cast_fp16")];
bool var_219_interleave_0 = const()[name = string("op_219_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_219_cast_fp16 = concat(axis = var_99, interleave = var_219_interleave_0, values = (var_214_cast_fp16, var_217_cast_fp16))[name = string("op_219_cast_fp16")];
tensor<int32, [4]> transpose_1_perm_0 = const()[name = string("transpose_1_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_4 = const()[name = string("concat_4"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = var_219_cast_fp16)[name = string("transpose_95")];
tensor<fp16, [128, 1024]> reshape_1_cast_fp16 = reshape(shape = concat_4, x = transpose_1_cast_fp16)[name = string("reshape_1_cast_fp16")];
bool matmul_0_transpose_x_1 = const()[name = string("matmul_0_transpose_x_1"), val = bool(true)];
bool matmul_0_transpose_y_1 = const()[name = string("matmul_0_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_1, transpose_y = matmul_0_transpose_y_1, x = var_68_to_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")];
tensor<int32, [4]> concat_7 = const()[name = string("concat_7"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_2_cast_fp16 = reshape(shape = concat_7, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")];
tensor<int32, [4]> scattered_k_1_perm_0 = const()[name = string("scattered_k_1_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_56_cast_fp16 = transpose(perm = transpose_56_perm_0, x = var_142_cast_fp16)[name = string("transpose_94")];
tensor<fp16, [128, 1024]> reshape_4_cast_fp16 = reshape(shape = concat_12, x = transpose_56_cast_fp16)[name = string("reshape_4_cast_fp16")];
bool matmul_1_transpose_x_1 = const()[name = string("matmul_1_transpose_x_1"), val = bool(true)];
bool matmul_1_transpose_y_1 = const()[name = string("matmul_1_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = var_68_to_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")];
tensor<int32, [4]> concat_15 = const()[name = string("concat_15"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_5_cast_fp16 = reshape(shape = concat_15, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")];
tensor<int32, [4]> scattered_v_1_perm_0 = const()[name = string("scattered_v_1_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
fp16 var_101_promoted_to_fp16 = const()[name = string("op_101_promoted_to_fp16"), val = fp16(0x1p+0)];
tensor<fp16, [1, 1, 1024, 1]> var_224_cast_fp16 = sub(x = var_101_promoted_to_fp16, y = var_84_cast_fp16)[name = string("op_224_cast_fp16")];
tensor<fp16, [1, 8, 1024, 128]> read_state_0 = read_state(input = k_cache_0)[name = string("read_state_0")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_3_cast_fp16 = mul(x = read_state_0, y = var_224_cast_fp16)[name = string("k_cache_3_cast_fp16")];
write_state(data = k_cache_3_cast_fp16, input = k_cache_0)[name = string("coreml_update_state_56_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_56 = read_state(input = k_cache_0)[name = string("coreml_update_state_56")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_1_cast_fp16 = transpose(perm = scattered_k_1_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_93")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_5_cast_fp16 = add(x = coreml_update_state_56, y = scattered_k_1_cast_fp16)[name = string("k_cache_5_cast_fp16")];
write_state(data = k_cache_5_cast_fp16, input = k_cache_0)[name = string("coreml_update_state_57_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_57 = read_state(input = k_cache_0)[name = string("coreml_update_state_57")];
tensor<fp16, [1, 8, 1024, 128]> read_state_1 = read_state(input = v_cache_0)[name = string("read_state_1")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_3_cast_fp16 = mul(x = read_state_1, y = var_224_cast_fp16)[name = string("v_cache_3_cast_fp16")];
write_state(data = v_cache_3_cast_fp16, input = v_cache_0)[name = string("coreml_update_state_58_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_58 = read_state(input = v_cache_0)[name = string("coreml_update_state_58")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_1_cast_fp16 = transpose(perm = scattered_v_1_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_92")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_5_cast_fp16 = add(x = coreml_update_state_58, y = scattered_v_1_cast_fp16)[name = string("v_cache_5_cast_fp16")];
write_state(data = v_cache_5_cast_fp16, input = v_cache_0)[name = string("coreml_update_state_59_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_59 = read_state(input = v_cache_0)[name = string("coreml_update_state_59")];
tensor<int32, [1]> var_230_axes_0 = const()[name = string("op_230_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_230_cast_fp16 = expand_dims(axes = var_230_axes_0, x = coreml_update_state_57)[name = string("op_230_cast_fp16")];
tensor<int32, [5]> k_exp_1_reps_0 = const()[name = string("k_exp_1_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_1_cast_fp16 = tile(reps = k_exp_1_reps_0, x = var_230_cast_fp16)[name = string("k_exp_1_cast_fp16")];
tensor<int32, [4]> var_233 = const()[name = string("op_233"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_3_cast_fp16 = reshape(shape = var_233, x = k_exp_1_cast_fp16)[name = string("k_exp_3_cast_fp16")];
tensor<int32, [1]> var_235_axes_0 = const()[name = string("op_235_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_235_cast_fp16 = expand_dims(axes = var_235_axes_0, x = coreml_update_state_59)[name = string("op_235_cast_fp16")];
tensor<int32, [5]> v_exp_1_reps_0 = const()[name = string("v_exp_1_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_1_cast_fp16 = tile(reps = v_exp_1_reps_0, x = var_235_cast_fp16)[name = string("v_exp_1_cast_fp16")];
tensor<int32, [4]> var_238 = const()[name = string("op_238"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_3_cast_fp16 = reshape(shape = var_238, x = v_exp_1_cast_fp16)[name = string("v_exp_3_cast_fp16")];
bool var_241_transpose_x_1 = const()[name = string("op_241_transpose_x_1"), val = bool(false)];
bool var_241_transpose_y_1 = const()[name = string("op_241_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_241_cast_fp16 = matmul(transpose_x = var_241_transpose_x_1, transpose_y = var_241_transpose_y_1, x = q_1_cast_fp16, y = k_exp_3_cast_fp16)[name = string("op_241_cast_fp16")];
fp16 var_242_to_fp16 = const()[name = string("op_242_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_1_cast_fp16 = mul(x = var_241_cast_fp16, y = var_242_to_fp16)[name = string("attn_1_cast_fp16")];
string attention_mask_to_fp16_dtype_0 = const()[name = string("attention_mask_to_fp16_dtype_0"), val = string("fp16")];
tensor<fp16, [1, 1, 128, 1024]> attention_mask_to_fp16 = cast(dtype = attention_mask_to_fp16_dtype_0, x = attention_mask)[name = string("cast_0")];
tensor<fp16, [1, 16, 128, 1024]> input_1_cast_fp16 = add(x = attn_1_cast_fp16, y = attention_mask_to_fp16)[name = string("input_1_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_3_cast_fp16 = softmax(axis = var_99, x = input_1_cast_fp16)[name = string("attn_3_cast_fp16")];
bool out_1_transpose_x_0 = const()[name = string("out_1_transpose_x_0"), val = bool(false)];
bool out_1_transpose_y_0 = const()[name = string("out_1_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_1_cast_fp16 = matmul(transpose_x = out_1_transpose_x_0, transpose_y = out_1_transpose_y_0, x = attn_3_cast_fp16, y = v_exp_3_cast_fp16)[name = string("out_1_cast_fp16")];
tensor<int32, [4]> var_247_perm_0 = const()[name = string("op_247_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_248 = const()[name = string("op_248"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_247_cast_fp16 = transpose(perm = var_247_perm_0, x = out_1_cast_fp16)[name = string("transpose_91")];
tensor<fp16, [1, 128, 2048]> input_3_cast_fp16 = reshape(shape = var_248, x = var_247_cast_fp16)[name = string("input_3_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205504))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6302720))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("linear_3_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_19_cast_fp16 = add(x = input_embeds_to_fp16, y = linear_3_cast_fp16)[name = string("x_19_cast_fp16")];
fp16 var_98_promoted_3_to_fp16 = const()[name = string("op_98_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_255_cast_fp16 = pow(x = x_19_cast_fp16, y = var_98_promoted_3_to_fp16)[name = string("op_255_cast_fp16")];
tensor<int32, [1]> var_257_axes_0 = const()[name = string("op_257_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_257_keep_dims_0 = const()[name = string("op_257_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_257_cast_fp16 = reduce_mean(axes = var_257_axes_0, keep_dims = var_257_keep_dims_0, x = var_255_cast_fp16)[name = string("op_257_cast_fp16")];
fp16 var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_259_cast_fp16 = add(x = var_257_cast_fp16, y = var_258_to_fp16)[name = string("op_259_cast_fp16")];
fp32 norm_7_epsilon_0 = const()[name = string("norm_7_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_7_cast_fp16 = rsqrt(epsilon = norm_7_epsilon_0, x = var_259_cast_fp16)[name = string("norm_7_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_261_cast_fp16 = mul(x = x_19_cast_fp16, y = norm_7_cast_fp16)[name = string("op_261_cast_fp16")];
tensor<fp16, [1024]> layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6303296)))];
tensor<fp16, [1, 128, 1024]> var_262_cast_fp16 = mul(x = var_261_cast_fp16, y = layers_0_post_attention_layernorm_weight_to_fp16)[name = string("op_262_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6305408))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451200))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [3072]> linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451776)))];
tensor<fp16, [1, 128, 3072]> linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = var_262_cast_fp16)[name = string("linear_4_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_272_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_272_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9457984))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12603776))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = var_262_cast_fp16)[name = string("linear_5_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_9_cast_fp16 = mul(x = var_272_cast_fp16, y = linear_5_cast_fp16)[name = string("input_9_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12604352))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750144))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("linear_6_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_6_cast_fp16)[name = string("x_25_cast_fp16")];
int32 var_293 = const()[name = string("op_293"), val = int32(-1)];
fp16 var_292_promoted_to_fp16 = const()[name = string("op_292_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_302_cast_fp16 = pow(x = x_25_cast_fp16, y = var_292_promoted_to_fp16)[name = string("op_302_cast_fp16")];
tensor<int32, [1]> var_304_axes_0 = const()[name = string("op_304_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_304_keep_dims_0 = const()[name = string("op_304_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_304_cast_fp16 = reduce_mean(axes = var_304_axes_0, keep_dims = var_304_keep_dims_0, x = var_302_cast_fp16)[name = string("op_304_cast_fp16")];
fp16 var_305_to_fp16 = const()[name = string("op_305_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_306_cast_fp16 = add(x = var_304_cast_fp16, y = var_305_to_fp16)[name = string("op_306_cast_fp16")];
fp32 norm_9_epsilon_0 = const()[name = string("norm_9_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_9_cast_fp16 = rsqrt(epsilon = norm_9_epsilon_0, x = var_306_cast_fp16)[name = string("norm_9_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_308_cast_fp16 = mul(x = x_25_cast_fp16, y = norm_9_cast_fp16)[name = string("op_308_cast_fp16")];
tensor<fp16, [1024]> layers_1_input_layernorm_weight_to_fp16 = const()[name = string("layers_1_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750720)))];
tensor<fp16, [1, 128, 1024]> var_309_cast_fp16 = mul(x = var_308_cast_fp16, y = layers_1_input_layernorm_weight_to_fp16)[name = string("op_309_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752832))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850048))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = var_309_cast_fp16)[name = string("linear_7_cast_fp16")];
tensor<int32, [4]> var_325 = const()[name = string("op_325"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_326_cast_fp16 = reshape(shape = var_325, x = linear_7_cast_fp16)[name = string("op_326_cast_fp16")];
tensor<int32, [4]> x_31_perm_0 = const()[name = string("x_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850624))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899264))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = var_309_cast_fp16)[name = string("linear_8_cast_fp16")];
tensor<int32, [4]> var_330 = const()[name = string("op_330"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_331_cast_fp16 = reshape(shape = var_330, x = linear_8_cast_fp16)[name = string("op_331_cast_fp16")];
tensor<int32, [4]> x_35_perm_0 = const()[name = string("x_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899840))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19948480))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = var_309_cast_fp16)[name = string("linear_9_cast_fp16")];
tensor<int32, [4]> var_335 = const()[name = string("op_335"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_336_cast_fp16 = reshape(shape = var_335, x = linear_9_cast_fp16)[name = string("op_336_cast_fp16")];
tensor<int32, [4]> transpose_57_perm_0 = const()[name = string("transpose_57_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_292_promoted_1_to_fp16 = const()[name = string("op_292_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_31_cast_fp16 = transpose(perm = x_31_perm_0, x = var_326_cast_fp16)[name = string("transpose_90")];
tensor<fp16, [1, 16, 128, 128]> var_340_cast_fp16 = pow(x = x_31_cast_fp16, y = var_292_promoted_1_to_fp16)[name = string("op_340_cast_fp16")];
tensor<int32, [1]> var_342_axes_0 = const()[name = string("op_342_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_342_keep_dims_0 = const()[name = string("op_342_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_342_cast_fp16 = reduce_mean(axes = var_342_axes_0, keep_dims = var_342_keep_dims_0, x = var_340_cast_fp16)[name = string("op_342_cast_fp16")];
fp16 var_343_to_fp16 = const()[name = string("op_343_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_344_cast_fp16 = add(x = var_342_cast_fp16, y = var_343_to_fp16)[name = string("op_344_cast_fp16")];
fp32 norm_11_epsilon_0 = const()[name = string("norm_11_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_11_cast_fp16 = rsqrt(epsilon = norm_11_epsilon_0, x = var_344_cast_fp16)[name = string("norm_11_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_346_cast_fp16 = mul(x = x_31_cast_fp16, y = norm_11_cast_fp16)[name = string("op_346_cast_fp16")];
tensor<fp16, [128]> layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949056)))];
tensor<fp16, [1, 16, 128, 128]> var_347_cast_fp16 = mul(x = var_346_cast_fp16, y = layers_1_self_attn_q_norm_weight_to_fp16)[name = string("op_347_cast_fp16")];
fp16 var_292_promoted_2_to_fp16 = const()[name = string("op_292_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_35_cast_fp16 = transpose(perm = x_35_perm_0, x = var_331_cast_fp16)[name = string("transpose_89")];
tensor<fp16, [1, 8, 128, 128]> var_351_cast_fp16 = pow(x = x_35_cast_fp16, y = var_292_promoted_2_to_fp16)[name = string("op_351_cast_fp16")];
tensor<int32, [1]> var_353_axes_0 = const()[name = string("op_353_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_353_keep_dims_0 = const()[name = string("op_353_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_353_cast_fp16 = reduce_mean(axes = var_353_axes_0, keep_dims = var_353_keep_dims_0, x = var_351_cast_fp16)[name = string("op_353_cast_fp16")];
fp16 var_354_to_fp16 = const()[name = string("op_354_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_355_cast_fp16 = add(x = var_353_cast_fp16, y = var_354_to_fp16)[name = string("op_355_cast_fp16")];
fp32 norm_13_epsilon_0 = const()[name = string("norm_13_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_13_cast_fp16 = rsqrt(epsilon = norm_13_epsilon_0, x = var_355_cast_fp16)[name = string("norm_13_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_357_cast_fp16 = mul(x = x_35_cast_fp16, y = norm_13_cast_fp16)[name = string("op_357_cast_fp16")];
tensor<fp16, [128]> layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949376)))];
tensor<fp16, [1, 8, 128, 128]> var_358_cast_fp16 = mul(x = var_357_cast_fp16, y = layers_1_self_attn_k_norm_weight_to_fp16)[name = string("op_358_cast_fp16")];
tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_347_cast_fp16)[name = string("x1_5_cast_fp16")];
tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_347_cast_fp16)[name = string("x2_5_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_379_cast_fp16 = mul(x = x1_5_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_379_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_380_cast_fp16 = mul(x = x2_5_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_380_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_381_cast_fp16 = sub(x = var_379_cast_fp16, y = var_380_cast_fp16)[name = string("op_381_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_382_cast_fp16 = mul(x = x2_5_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_382_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_383_cast_fp16 = mul(x = x1_5_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_383_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_384_cast_fp16 = add(x = var_382_cast_fp16, y = var_383_cast_fp16)[name = string("op_384_cast_fp16")];
bool q_3_interleave_0 = const()[name = string("q_3_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_3_cast_fp16 = concat(axis = var_293, interleave = q_3_interleave_0, values = (var_381_cast_fp16, var_384_cast_fp16))[name = string("q_3_cast_fp16")];
tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_358_cast_fp16)[name = string("x1_7_cast_fp16")];
tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_358_cast_fp16)[name = string("x2_7_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_406_cast_fp16 = mul(x = x1_7_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_406_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_407_cast_fp16 = mul(x = x2_7_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_407_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_408_cast_fp16 = sub(x = var_406_cast_fp16, y = var_407_cast_fp16)[name = string("op_408_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_409_cast_fp16 = mul(x = x2_7_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_409_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_410_cast_fp16 = mul(x = x1_7_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_410_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_411_cast_fp16 = add(x = var_409_cast_fp16, y = var_410_cast_fp16)[name = string("op_411_cast_fp16")];
bool var_413_interleave_0 = const()[name = string("op_413_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_413_cast_fp16 = concat(axis = var_293, interleave = var_413_interleave_0, values = (var_408_cast_fp16, var_411_cast_fp16))[name = string("op_413_cast_fp16")];
tensor<int32, [4]> transpose_5_perm_0 = const()[name = string("transpose_5_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_22 = const()[name = string("concat_22"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = var_413_cast_fp16)[name = string("transpose_88")];
tensor<fp16, [128, 1024]> reshape_7_cast_fp16 = reshape(shape = concat_22, x = transpose_5_cast_fp16)[name = string("reshape_7_cast_fp16")];
bool matmul_2_transpose_x_1 = const()[name = string("matmul_2_transpose_x_1"), val = bool(true)];
bool matmul_2_transpose_y_1 = const()[name = string("matmul_2_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_1, transpose_y = matmul_2_transpose_y_1, x = var_68_to_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")];
tensor<int32, [4]> concat_25 = const()[name = string("concat_25"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_8_cast_fp16 = reshape(shape = concat_25, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")];
tensor<int32, [4]> scattered_k_3_perm_0 = const()[name = string("scattered_k_3_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_57_cast_fp16 = transpose(perm = transpose_57_perm_0, x = var_336_cast_fp16)[name = string("transpose_87")];
tensor<fp16, [128, 1024]> reshape_10_cast_fp16 = reshape(shape = concat_30, x = transpose_57_cast_fp16)[name = string("reshape_10_cast_fp16")];
bool matmul_3_transpose_x_1 = const()[name = string("matmul_3_transpose_x_1"), val = bool(true)];
bool matmul_3_transpose_y_1 = const()[name = string("matmul_3_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = var_68_to_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")];
tensor<int32, [4]> concat_33 = const()[name = string("concat_33"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_11_cast_fp16 = reshape(shape = concat_33, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")];
tensor<int32, [4]> scattered_v_3_perm_0 = const()[name = string("scattered_v_3_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_2 = read_state(input = k_cache_1)[name = string("read_state_2")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_9_cast_fp16 = mul(x = read_state_2, y = var_224_cast_fp16)[name = string("k_cache_9_cast_fp16")];
write_state(data = k_cache_9_cast_fp16, input = k_cache_1)[name = string("coreml_update_state_60_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_60 = read_state(input = k_cache_1)[name = string("coreml_update_state_60")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_3_cast_fp16 = transpose(perm = scattered_k_3_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_86")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_11_cast_fp16 = add(x = coreml_update_state_60, y = scattered_k_3_cast_fp16)[name = string("k_cache_11_cast_fp16")];
write_state(data = k_cache_11_cast_fp16, input = k_cache_1)[name = string("coreml_update_state_61_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_61 = read_state(input = k_cache_1)[name = string("coreml_update_state_61")];
tensor<fp16, [1, 8, 1024, 128]> read_state_3 = read_state(input = v_cache_1)[name = string("read_state_3")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_9_cast_fp16 = mul(x = read_state_3, y = var_224_cast_fp16)[name = string("v_cache_9_cast_fp16")];
write_state(data = v_cache_9_cast_fp16, input = v_cache_1)[name = string("coreml_update_state_62_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_62 = read_state(input = v_cache_1)[name = string("coreml_update_state_62")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_3_cast_fp16 = transpose(perm = scattered_v_3_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_85")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_11_cast_fp16 = add(x = coreml_update_state_62, y = scattered_v_3_cast_fp16)[name = string("v_cache_11_cast_fp16")];
write_state(data = v_cache_11_cast_fp16, input = v_cache_1)[name = string("coreml_update_state_63_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_63 = read_state(input = v_cache_1)[name = string("coreml_update_state_63")];
tensor<int32, [1]> var_424_axes_0 = const()[name = string("op_424_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_424_cast_fp16 = expand_dims(axes = var_424_axes_0, x = coreml_update_state_61)[name = string("op_424_cast_fp16")];
tensor<int32, [5]> k_exp_5_reps_0 = const()[name = string("k_exp_5_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_5_cast_fp16 = tile(reps = k_exp_5_reps_0, x = var_424_cast_fp16)[name = string("k_exp_5_cast_fp16")];
tensor<int32, [4]> var_427 = const()[name = string("op_427"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_7_cast_fp16 = reshape(shape = var_427, x = k_exp_5_cast_fp16)[name = string("k_exp_7_cast_fp16")];
tensor<int32, [1]> var_429_axes_0 = const()[name = string("op_429_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_429_cast_fp16 = expand_dims(axes = var_429_axes_0, x = coreml_update_state_63)[name = string("op_429_cast_fp16")];
tensor<int32, [5]> v_exp_5_reps_0 = const()[name = string("v_exp_5_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_5_cast_fp16 = tile(reps = v_exp_5_reps_0, x = var_429_cast_fp16)[name = string("v_exp_5_cast_fp16")];
tensor<int32, [4]> var_432 = const()[name = string("op_432"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_7_cast_fp16 = reshape(shape = var_432, x = v_exp_5_cast_fp16)[name = string("v_exp_7_cast_fp16")];
bool var_435_transpose_x_1 = const()[name = string("op_435_transpose_x_1"), val = bool(false)];
bool var_435_transpose_y_1 = const()[name = string("op_435_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_435_cast_fp16 = matmul(transpose_x = var_435_transpose_x_1, transpose_y = var_435_transpose_y_1, x = q_3_cast_fp16, y = k_exp_7_cast_fp16)[name = string("op_435_cast_fp16")];
fp16 var_436_to_fp16 = const()[name = string("op_436_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_5_cast_fp16 = mul(x = var_435_cast_fp16, y = var_436_to_fp16)[name = string("attn_5_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_11_cast_fp16 = add(x = attn_5_cast_fp16, y = attention_mask_to_fp16)[name = string("input_11_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_7_cast_fp16 = softmax(axis = var_293, x = input_11_cast_fp16)[name = string("attn_7_cast_fp16")];
bool out_3_transpose_x_0 = const()[name = string("out_3_transpose_x_0"), val = bool(false)];
bool out_3_transpose_y_0 = const()[name = string("out_3_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_3_cast_fp16 = matmul(transpose_x = out_3_transpose_x_0, transpose_y = out_3_transpose_y_0, x = attn_7_cast_fp16, y = v_exp_7_cast_fp16)[name = string("out_3_cast_fp16")];
tensor<int32, [4]> var_441_perm_0 = const()[name = string("op_441_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_442 = const()[name = string("op_442"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_441_cast_fp16 = transpose(perm = var_441_perm_0, x = out_3_cast_fp16)[name = string("transpose_84")];
tensor<fp16, [1, 128, 2048]> input_13_cast_fp16 = reshape(shape = var_442, x = var_441_cast_fp16)[name = string("input_13_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949696))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22046912))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("linear_10_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_45_cast_fp16 = add(x = x_25_cast_fp16, y = linear_10_cast_fp16)[name = string("x_45_cast_fp16")];
fp16 var_292_promoted_3_to_fp16 = const()[name = string("op_292_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_449_cast_fp16 = pow(x = x_45_cast_fp16, y = var_292_promoted_3_to_fp16)[name = string("op_449_cast_fp16")];
tensor<int32, [1]> var_451_axes_0 = const()[name = string("op_451_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_451_keep_dims_0 = const()[name = string("op_451_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_451_cast_fp16 = reduce_mean(axes = var_451_axes_0, keep_dims = var_451_keep_dims_0, x = var_449_cast_fp16)[name = string("op_451_cast_fp16")];
fp16 var_452_to_fp16 = const()[name = string("op_452_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_453_cast_fp16 = add(x = var_451_cast_fp16, y = var_452_to_fp16)[name = string("op_453_cast_fp16")];
fp32 norm_15_epsilon_0 = const()[name = string("norm_15_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_15_cast_fp16 = rsqrt(epsilon = norm_15_epsilon_0, x = var_453_cast_fp16)[name = string("norm_15_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_455_cast_fp16 = mul(x = x_45_cast_fp16, y = norm_15_cast_fp16)[name = string("op_455_cast_fp16")];
tensor<fp16, [1024]> layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22047488)))];
tensor<fp16, [1, 128, 1024]> var_456_cast_fp16 = mul(x = var_455_cast_fp16, y = layers_1_post_attention_layernorm_weight_to_fp16)[name = string("op_456_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22049600))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195392))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = var_456_cast_fp16)[name = string("linear_11_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_466_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_466_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195968))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28341760))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = var_456_cast_fp16)[name = string("linear_12_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_19_cast_fp16 = mul(x = var_466_cast_fp16, y = linear_12_cast_fp16)[name = string("input_19_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28342336))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488128))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_13_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_51_cast_fp16 = add(x = x_45_cast_fp16, y = linear_13_cast_fp16)[name = string("x_51_cast_fp16")];
int32 var_487 = const()[name = string("op_487"), val = int32(-1)];
fp16 var_486_promoted_to_fp16 = const()[name = string("op_486_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_496_cast_fp16 = pow(x = x_51_cast_fp16, y = var_486_promoted_to_fp16)[name = string("op_496_cast_fp16")];
tensor<int32, [1]> var_498_axes_0 = const()[name = string("op_498_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_498_keep_dims_0 = const()[name = string("op_498_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_498_cast_fp16 = reduce_mean(axes = var_498_axes_0, keep_dims = var_498_keep_dims_0, x = var_496_cast_fp16)[name = string("op_498_cast_fp16")];
fp16 var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_500_cast_fp16 = add(x = var_498_cast_fp16, y = var_499_to_fp16)[name = string("op_500_cast_fp16")];
fp32 norm_17_epsilon_0 = const()[name = string("norm_17_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_17_cast_fp16 = rsqrt(epsilon = norm_17_epsilon_0, x = var_500_cast_fp16)[name = string("norm_17_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_502_cast_fp16 = mul(x = x_51_cast_fp16, y = norm_17_cast_fp16)[name = string("op_502_cast_fp16")];
tensor<fp16, [1024]> layers_2_input_layernorm_weight_to_fp16 = const()[name = string("layers_2_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488704)))];
tensor<fp16, [1, 128, 1024]> var_503_cast_fp16 = mul(x = var_502_cast_fp16, y = layers_2_input_layernorm_weight_to_fp16)[name = string("op_503_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490816))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588032))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = var_503_cast_fp16)[name = string("linear_14_cast_fp16")];
tensor<int32, [4]> var_519 = const()[name = string("op_519"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_520_cast_fp16 = reshape(shape = var_519, x = linear_14_cast_fp16)[name = string("op_520_cast_fp16")];
tensor<int32, [4]> x_57_perm_0 = const()[name = string("x_57_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588608))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637248))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = var_503_cast_fp16)[name = string("linear_15_cast_fp16")];
tensor<int32, [4]> var_524 = const()[name = string("op_524"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_525_cast_fp16 = reshape(shape = var_524, x = linear_15_cast_fp16)[name = string("op_525_cast_fp16")];
tensor<int32, [4]> x_61_perm_0 = const()[name = string("x_61_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637824))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35686464))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = var_503_cast_fp16)[name = string("linear_16_cast_fp16")];
tensor<int32, [4]> var_529 = const()[name = string("op_529"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_530_cast_fp16 = reshape(shape = var_529, x = linear_16_cast_fp16)[name = string("op_530_cast_fp16")];
tensor<int32, [4]> transpose_58_perm_0 = const()[name = string("transpose_58_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_486_promoted_1_to_fp16 = const()[name = string("op_486_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_57_cast_fp16 = transpose(perm = x_57_perm_0, x = var_520_cast_fp16)[name = string("transpose_83")];
tensor<fp16, [1, 16, 128, 128]> var_534_cast_fp16 = pow(x = x_57_cast_fp16, y = var_486_promoted_1_to_fp16)[name = string("op_534_cast_fp16")];
tensor<int32, [1]> var_536_axes_0 = const()[name = string("op_536_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_536_keep_dims_0 = const()[name = string("op_536_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_536_cast_fp16 = reduce_mean(axes = var_536_axes_0, keep_dims = var_536_keep_dims_0, x = var_534_cast_fp16)[name = string("op_536_cast_fp16")];
fp16 var_537_to_fp16 = const()[name = string("op_537_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_538_cast_fp16 = add(x = var_536_cast_fp16, y = var_537_to_fp16)[name = string("op_538_cast_fp16")];
fp32 norm_19_epsilon_0 = const()[name = string("norm_19_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_19_cast_fp16 = rsqrt(epsilon = norm_19_epsilon_0, x = var_538_cast_fp16)[name = string("norm_19_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_540_cast_fp16 = mul(x = x_57_cast_fp16, y = norm_19_cast_fp16)[name = string("op_540_cast_fp16")];
tensor<fp16, [128]> layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687040)))];
tensor<fp16, [1, 16, 128, 128]> var_541_cast_fp16 = mul(x = var_540_cast_fp16, y = layers_2_self_attn_q_norm_weight_to_fp16)[name = string("op_541_cast_fp16")];
fp16 var_486_promoted_2_to_fp16 = const()[name = string("op_486_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_61_cast_fp16 = transpose(perm = x_61_perm_0, x = var_525_cast_fp16)[name = string("transpose_82")];
tensor<fp16, [1, 8, 128, 128]> var_545_cast_fp16 = pow(x = x_61_cast_fp16, y = var_486_promoted_2_to_fp16)[name = string("op_545_cast_fp16")];
tensor<int32, [1]> var_547_axes_0 = const()[name = string("op_547_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_547_keep_dims_0 = const()[name = string("op_547_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_547_cast_fp16 = reduce_mean(axes = var_547_axes_0, keep_dims = var_547_keep_dims_0, x = var_545_cast_fp16)[name = string("op_547_cast_fp16")];
fp16 var_548_to_fp16 = const()[name = string("op_548_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_549_cast_fp16 = add(x = var_547_cast_fp16, y = var_548_to_fp16)[name = string("op_549_cast_fp16")];
fp32 norm_21_epsilon_0 = const()[name = string("norm_21_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_21_cast_fp16 = rsqrt(epsilon = norm_21_epsilon_0, x = var_549_cast_fp16)[name = string("norm_21_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_551_cast_fp16 = mul(x = x_61_cast_fp16, y = norm_21_cast_fp16)[name = string("op_551_cast_fp16")];
tensor<fp16, [128]> layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687360)))];
tensor<fp16, [1, 8, 128, 128]> var_552_cast_fp16 = mul(x = var_551_cast_fp16, y = layers_2_self_attn_k_norm_weight_to_fp16)[name = string("op_552_cast_fp16")];
tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_541_cast_fp16)[name = string("x1_9_cast_fp16")];
tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_541_cast_fp16)[name = string("x2_9_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_573_cast_fp16 = mul(x = x1_9_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_573_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_574_cast_fp16 = mul(x = x2_9_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_574_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_575_cast_fp16 = sub(x = var_573_cast_fp16, y = var_574_cast_fp16)[name = string("op_575_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_576_cast_fp16 = mul(x = x2_9_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_576_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_577_cast_fp16 = mul(x = x1_9_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_577_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_578_cast_fp16 = add(x = var_576_cast_fp16, y = var_577_cast_fp16)[name = string("op_578_cast_fp16")];
bool q_5_interleave_0 = const()[name = string("q_5_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_5_cast_fp16 = concat(axis = var_487, interleave = q_5_interleave_0, values = (var_575_cast_fp16, var_578_cast_fp16))[name = string("q_5_cast_fp16")];
tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_552_cast_fp16)[name = string("x1_11_cast_fp16")];
tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_552_cast_fp16)[name = string("x2_11_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_600_cast_fp16 = mul(x = x1_11_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_600_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_601_cast_fp16 = mul(x = x2_11_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_601_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_602_cast_fp16 = sub(x = var_600_cast_fp16, y = var_601_cast_fp16)[name = string("op_602_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_603_cast_fp16 = mul(x = x2_11_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_603_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_604_cast_fp16 = mul(x = x1_11_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_604_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_605_cast_fp16 = add(x = var_603_cast_fp16, y = var_604_cast_fp16)[name = string("op_605_cast_fp16")];
bool var_607_interleave_0 = const()[name = string("op_607_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_607_cast_fp16 = concat(axis = var_487, interleave = var_607_interleave_0, values = (var_602_cast_fp16, var_605_cast_fp16))[name = string("op_607_cast_fp16")];
tensor<int32, [4]> transpose_9_perm_0 = const()[name = string("transpose_9_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_40 = const()[name = string("concat_40"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = var_607_cast_fp16)[name = string("transpose_81")];
tensor<fp16, [128, 1024]> reshape_13_cast_fp16 = reshape(shape = concat_40, x = transpose_9_cast_fp16)[name = string("reshape_13_cast_fp16")];
bool matmul_4_transpose_x_1 = const()[name = string("matmul_4_transpose_x_1"), val = bool(true)];
bool matmul_4_transpose_y_1 = const()[name = string("matmul_4_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_1, transpose_y = matmul_4_transpose_y_1, x = var_68_to_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")];
tensor<int32, [4]> concat_43 = const()[name = string("concat_43"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_14_cast_fp16 = reshape(shape = concat_43, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")];
tensor<int32, [4]> scattered_k_5_perm_0 = const()[name = string("scattered_k_5_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_48 = const()[name = string("concat_48"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_58_cast_fp16 = transpose(perm = transpose_58_perm_0, x = var_530_cast_fp16)[name = string("transpose_80")];
tensor<fp16, [128, 1024]> reshape_16_cast_fp16 = reshape(shape = concat_48, x = transpose_58_cast_fp16)[name = string("reshape_16_cast_fp16")];
bool matmul_5_transpose_x_1 = const()[name = string("matmul_5_transpose_x_1"), val = bool(true)];
bool matmul_5_transpose_y_1 = const()[name = string("matmul_5_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_1, transpose_y = matmul_5_transpose_y_1, x = var_68_to_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")];
tensor<int32, [4]> concat_51 = const()[name = string("concat_51"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_17_cast_fp16 = reshape(shape = concat_51, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")];
tensor<int32, [4]> scattered_v_5_perm_0 = const()[name = string("scattered_v_5_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_4 = read_state(input = k_cache_2)[name = string("read_state_4")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_15_cast_fp16 = mul(x = read_state_4, y = var_224_cast_fp16)[name = string("k_cache_15_cast_fp16")];
write_state(data = k_cache_15_cast_fp16, input = k_cache_2)[name = string("coreml_update_state_64_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_64 = read_state(input = k_cache_2)[name = string("coreml_update_state_64")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_5_cast_fp16 = transpose(perm = scattered_k_5_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_79")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_17_cast_fp16 = add(x = coreml_update_state_64, y = scattered_k_5_cast_fp16)[name = string("k_cache_17_cast_fp16")];
write_state(data = k_cache_17_cast_fp16, input = k_cache_2)[name = string("coreml_update_state_65_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_65 = read_state(input = k_cache_2)[name = string("coreml_update_state_65")];
tensor<fp16, [1, 8, 1024, 128]> read_state_5 = read_state(input = v_cache_2)[name = string("read_state_5")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_15_cast_fp16 = mul(x = read_state_5, y = var_224_cast_fp16)[name = string("v_cache_15_cast_fp16")];
write_state(data = v_cache_15_cast_fp16, input = v_cache_2)[name = string("coreml_update_state_66_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_66 = read_state(input = v_cache_2)[name = string("coreml_update_state_66")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_5_cast_fp16 = transpose(perm = scattered_v_5_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_78")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_17_cast_fp16 = add(x = coreml_update_state_66, y = scattered_v_5_cast_fp16)[name = string("v_cache_17_cast_fp16")];
write_state(data = v_cache_17_cast_fp16, input = v_cache_2)[name = string("coreml_update_state_67_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_67 = read_state(input = v_cache_2)[name = string("coreml_update_state_67")];
tensor<int32, [1]> var_618_axes_0 = const()[name = string("op_618_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_618_cast_fp16 = expand_dims(axes = var_618_axes_0, x = coreml_update_state_65)[name = string("op_618_cast_fp16")];
tensor<int32, [5]> k_exp_9_reps_0 = const()[name = string("k_exp_9_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_9_cast_fp16 = tile(reps = k_exp_9_reps_0, x = var_618_cast_fp16)[name = string("k_exp_9_cast_fp16")];
tensor<int32, [4]> var_621 = const()[name = string("op_621"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_11_cast_fp16 = reshape(shape = var_621, x = k_exp_9_cast_fp16)[name = string("k_exp_11_cast_fp16")];
tensor<int32, [1]> var_623_axes_0 = const()[name = string("op_623_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_623_cast_fp16 = expand_dims(axes = var_623_axes_0, x = coreml_update_state_67)[name = string("op_623_cast_fp16")];
tensor<int32, [5]> v_exp_9_reps_0 = const()[name = string("v_exp_9_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_9_cast_fp16 = tile(reps = v_exp_9_reps_0, x = var_623_cast_fp16)[name = string("v_exp_9_cast_fp16")];
tensor<int32, [4]> var_626 = const()[name = string("op_626"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_11_cast_fp16 = reshape(shape = var_626, x = v_exp_9_cast_fp16)[name = string("v_exp_11_cast_fp16")];
bool var_629_transpose_x_1 = const()[name = string("op_629_transpose_x_1"), val = bool(false)];
bool var_629_transpose_y_1 = const()[name = string("op_629_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_629_cast_fp16 = matmul(transpose_x = var_629_transpose_x_1, transpose_y = var_629_transpose_y_1, x = q_5_cast_fp16, y = k_exp_11_cast_fp16)[name = string("op_629_cast_fp16")];
fp16 var_630_to_fp16 = const()[name = string("op_630_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_9_cast_fp16 = mul(x = var_629_cast_fp16, y = var_630_to_fp16)[name = string("attn_9_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_21_cast_fp16 = add(x = attn_9_cast_fp16, y = attention_mask_to_fp16)[name = string("input_21_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_11_cast_fp16 = softmax(axis = var_487, x = input_21_cast_fp16)[name = string("attn_11_cast_fp16")];
bool out_5_transpose_x_0 = const()[name = string("out_5_transpose_x_0"), val = bool(false)];
bool out_5_transpose_y_0 = const()[name = string("out_5_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_5_cast_fp16 = matmul(transpose_x = out_5_transpose_x_0, transpose_y = out_5_transpose_y_0, x = attn_11_cast_fp16, y = v_exp_11_cast_fp16)[name = string("out_5_cast_fp16")];
tensor<int32, [4]> var_635_perm_0 = const()[name = string("op_635_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_636 = const()[name = string("op_636"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_635_cast_fp16 = transpose(perm = var_635_perm_0, x = out_5_cast_fp16)[name = string("transpose_77")];
tensor<fp16, [1, 128, 2048]> input_23_cast_fp16 = reshape(shape = var_636, x = var_635_cast_fp16)[name = string("input_23_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687680))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37784896))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("linear_17_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_71_cast_fp16 = add(x = x_51_cast_fp16, y = linear_17_cast_fp16)[name = string("x_71_cast_fp16")];
fp16 var_486_promoted_3_to_fp16 = const()[name = string("op_486_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_643_cast_fp16 = pow(x = x_71_cast_fp16, y = var_486_promoted_3_to_fp16)[name = string("op_643_cast_fp16")];
tensor<int32, [1]> var_645_axes_0 = const()[name = string("op_645_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_645_keep_dims_0 = const()[name = string("op_645_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_645_cast_fp16 = reduce_mean(axes = var_645_axes_0, keep_dims = var_645_keep_dims_0, x = var_643_cast_fp16)[name = string("op_645_cast_fp16")];
fp16 var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_647_cast_fp16 = add(x = var_645_cast_fp16, y = var_646_to_fp16)[name = string("op_647_cast_fp16")];
fp32 norm_23_epsilon_0 = const()[name = string("norm_23_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_23_cast_fp16 = rsqrt(epsilon = norm_23_epsilon_0, x = var_647_cast_fp16)[name = string("norm_23_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_649_cast_fp16 = mul(x = x_71_cast_fp16, y = norm_23_cast_fp16)[name = string("op_649_cast_fp16")];
tensor<fp16, [1024]> layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37785472)))];
tensor<fp16, [1, 128, 1024]> var_650_cast_fp16 = mul(x = var_649_cast_fp16, y = layers_2_post_attention_layernorm_weight_to_fp16)[name = string("op_650_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37787584))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933376))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = var_650_cast_fp16)[name = string("linear_18_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_660_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_660_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933952))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44079744))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = var_650_cast_fp16)[name = string("linear_19_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_29_cast_fp16 = mul(x = var_660_cast_fp16, y = linear_19_cast_fp16)[name = string("input_29_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44080320))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226112))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("linear_20_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_77_cast_fp16 = add(x = x_71_cast_fp16, y = linear_20_cast_fp16)[name = string("x_77_cast_fp16")];
int32 var_681 = const()[name = string("op_681"), val = int32(-1)];
fp16 var_680_promoted_to_fp16 = const()[name = string("op_680_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_690_cast_fp16 = pow(x = x_77_cast_fp16, y = var_680_promoted_to_fp16)[name = string("op_690_cast_fp16")];
tensor<int32, [1]> var_692_axes_0 = const()[name = string("op_692_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_692_keep_dims_0 = const()[name = string("op_692_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_692_cast_fp16 = reduce_mean(axes = var_692_axes_0, keep_dims = var_692_keep_dims_0, x = var_690_cast_fp16)[name = string("op_692_cast_fp16")];
fp16 var_693_to_fp16 = const()[name = string("op_693_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_694_cast_fp16 = add(x = var_692_cast_fp16, y = var_693_to_fp16)[name = string("op_694_cast_fp16")];
fp32 norm_25_epsilon_0 = const()[name = string("norm_25_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_25_cast_fp16 = rsqrt(epsilon = norm_25_epsilon_0, x = var_694_cast_fp16)[name = string("norm_25_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_696_cast_fp16 = mul(x = x_77_cast_fp16, y = norm_25_cast_fp16)[name = string("op_696_cast_fp16")];
tensor<fp16, [1024]> layers_3_input_layernorm_weight_to_fp16 = const()[name = string("layers_3_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226688)))];
tensor<fp16, [1, 128, 1024]> var_697_cast_fp16 = mul(x = var_696_cast_fp16, y = layers_3_input_layernorm_weight_to_fp16)[name = string("op_697_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228800))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326016))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = var_697_cast_fp16)[name = string("linear_21_cast_fp16")];
tensor<int32, [4]> var_713 = const()[name = string("op_713"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_714_cast_fp16 = reshape(shape = var_713, x = linear_21_cast_fp16)[name = string("op_714_cast_fp16")];
tensor<int32, [4]> x_83_perm_0 = const()[name = string("x_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326592))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375232))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = var_697_cast_fp16)[name = string("linear_22_cast_fp16")];
tensor<int32, [4]> var_718 = const()[name = string("op_718"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_719_cast_fp16 = reshape(shape = var_718, x = linear_22_cast_fp16)[name = string("op_719_cast_fp16")];
tensor<int32, [4]> x_87_perm_0 = const()[name = string("x_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375808))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51424448))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = var_697_cast_fp16)[name = string("linear_23_cast_fp16")];
tensor<int32, [4]> var_723 = const()[name = string("op_723"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_724_cast_fp16 = reshape(shape = var_723, x = linear_23_cast_fp16)[name = string("op_724_cast_fp16")];
tensor<int32, [4]> transpose_59_perm_0 = const()[name = string("transpose_59_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_680_promoted_1_to_fp16 = const()[name = string("op_680_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_83_cast_fp16 = transpose(perm = x_83_perm_0, x = var_714_cast_fp16)[name = string("transpose_76")];
tensor<fp16, [1, 16, 128, 128]> var_728_cast_fp16 = pow(x = x_83_cast_fp16, y = var_680_promoted_1_to_fp16)[name = string("op_728_cast_fp16")];
tensor<int32, [1]> var_730_axes_0 = const()[name = string("op_730_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_730_keep_dims_0 = const()[name = string("op_730_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_730_cast_fp16 = reduce_mean(axes = var_730_axes_0, keep_dims = var_730_keep_dims_0, x = var_728_cast_fp16)[name = string("op_730_cast_fp16")];
fp16 var_731_to_fp16 = const()[name = string("op_731_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_732_cast_fp16 = add(x = var_730_cast_fp16, y = var_731_to_fp16)[name = string("op_732_cast_fp16")];
fp32 norm_27_epsilon_0 = const()[name = string("norm_27_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_27_cast_fp16 = rsqrt(epsilon = norm_27_epsilon_0, x = var_732_cast_fp16)[name = string("norm_27_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_734_cast_fp16 = mul(x = x_83_cast_fp16, y = norm_27_cast_fp16)[name = string("op_734_cast_fp16")];
tensor<fp16, [128]> layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425024)))];
tensor<fp16, [1, 16, 128, 128]> var_735_cast_fp16 = mul(x = var_734_cast_fp16, y = layers_3_self_attn_q_norm_weight_to_fp16)[name = string("op_735_cast_fp16")];
fp16 var_680_promoted_2_to_fp16 = const()[name = string("op_680_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_87_cast_fp16 = transpose(perm = x_87_perm_0, x = var_719_cast_fp16)[name = string("transpose_75")];
tensor<fp16, [1, 8, 128, 128]> var_739_cast_fp16 = pow(x = x_87_cast_fp16, y = var_680_promoted_2_to_fp16)[name = string("op_739_cast_fp16")];
tensor<int32, [1]> var_741_axes_0 = const()[name = string("op_741_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_741_keep_dims_0 = const()[name = string("op_741_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_741_cast_fp16 = reduce_mean(axes = var_741_axes_0, keep_dims = var_741_keep_dims_0, x = var_739_cast_fp16)[name = string("op_741_cast_fp16")];
fp16 var_742_to_fp16 = const()[name = string("op_742_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_743_cast_fp16 = add(x = var_741_cast_fp16, y = var_742_to_fp16)[name = string("op_743_cast_fp16")];
fp32 norm_29_epsilon_0 = const()[name = string("norm_29_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_29_cast_fp16 = rsqrt(epsilon = norm_29_epsilon_0, x = var_743_cast_fp16)[name = string("norm_29_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_745_cast_fp16 = mul(x = x_87_cast_fp16, y = norm_29_cast_fp16)[name = string("op_745_cast_fp16")];
tensor<fp16, [128]> layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425344)))];
tensor<fp16, [1, 8, 128, 128]> var_746_cast_fp16 = mul(x = var_745_cast_fp16, y = layers_3_self_attn_k_norm_weight_to_fp16)[name = string("op_746_cast_fp16")];
tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_735_cast_fp16)[name = string("x1_13_cast_fp16")];
tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_735_cast_fp16)[name = string("x2_13_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_767_cast_fp16 = mul(x = x1_13_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_767_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_768_cast_fp16 = mul(x = x2_13_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_768_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_769_cast_fp16 = sub(x = var_767_cast_fp16, y = var_768_cast_fp16)[name = string("op_769_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_770_cast_fp16 = mul(x = x2_13_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_770_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_771_cast_fp16 = mul(x = x1_13_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_771_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_772_cast_fp16 = add(x = var_770_cast_fp16, y = var_771_cast_fp16)[name = string("op_772_cast_fp16")];
bool q_7_interleave_0 = const()[name = string("q_7_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_7_cast_fp16 = concat(axis = var_681, interleave = q_7_interleave_0, values = (var_769_cast_fp16, var_772_cast_fp16))[name = string("q_7_cast_fp16")];
tensor<int32, [4]> x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_746_cast_fp16)[name = string("x1_15_cast_fp16")];
tensor<int32, [4]> x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_746_cast_fp16)[name = string("x2_15_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_794_cast_fp16 = mul(x = x1_15_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_794_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_795_cast_fp16 = mul(x = x2_15_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_795_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_796_cast_fp16 = sub(x = var_794_cast_fp16, y = var_795_cast_fp16)[name = string("op_796_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_797_cast_fp16 = mul(x = x2_15_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_797_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_798_cast_fp16 = mul(x = x1_15_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_798_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_799_cast_fp16 = add(x = var_797_cast_fp16, y = var_798_cast_fp16)[name = string("op_799_cast_fp16")];
bool var_801_interleave_0 = const()[name = string("op_801_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_801_cast_fp16 = concat(axis = var_681, interleave = var_801_interleave_0, values = (var_796_cast_fp16, var_799_cast_fp16))[name = string("op_801_cast_fp16")];
tensor<int32, [4]> transpose_13_perm_0 = const()[name = string("transpose_13_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_58 = const()[name = string("concat_58"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = var_801_cast_fp16)[name = string("transpose_74")];
tensor<fp16, [128, 1024]> reshape_19_cast_fp16 = reshape(shape = concat_58, x = transpose_13_cast_fp16)[name = string("reshape_19_cast_fp16")];
bool matmul_6_transpose_x_1 = const()[name = string("matmul_6_transpose_x_1"), val = bool(true)];
bool matmul_6_transpose_y_1 = const()[name = string("matmul_6_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_1, transpose_y = matmul_6_transpose_y_1, x = var_68_to_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")];
tensor<int32, [4]> concat_61 = const()[name = string("concat_61"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_20_cast_fp16 = reshape(shape = concat_61, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")];
tensor<int32, [4]> scattered_k_7_perm_0 = const()[name = string("scattered_k_7_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_66 = const()[name = string("concat_66"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_59_cast_fp16 = transpose(perm = transpose_59_perm_0, x = var_724_cast_fp16)[name = string("transpose_73")];
tensor<fp16, [128, 1024]> reshape_22_cast_fp16 = reshape(shape = concat_66, x = transpose_59_cast_fp16)[name = string("reshape_22_cast_fp16")];
bool matmul_7_transpose_x_1 = const()[name = string("matmul_7_transpose_x_1"), val = bool(true)];
bool matmul_7_transpose_y_1 = const()[name = string("matmul_7_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_1, transpose_y = matmul_7_transpose_y_1, x = var_68_to_fp16, y = reshape_22_cast_fp16)[name = string("matmul_7_cast_fp16")];
tensor<int32, [4]> concat_69 = const()[name = string("concat_69"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_23_cast_fp16 = reshape(shape = concat_69, x = matmul_7_cast_fp16)[name = string("reshape_23_cast_fp16")];
tensor<int32, [4]> scattered_v_7_perm_0 = const()[name = string("scattered_v_7_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_6 = read_state(input = k_cache_3)[name = string("read_state_6")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_21_cast_fp16 = mul(x = read_state_6, y = var_224_cast_fp16)[name = string("k_cache_21_cast_fp16")];
write_state(data = k_cache_21_cast_fp16, input = k_cache_3)[name = string("coreml_update_state_68_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_68 = read_state(input = k_cache_3)[name = string("coreml_update_state_68")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_7_cast_fp16 = transpose(perm = scattered_k_7_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_72")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_23_cast_fp16 = add(x = coreml_update_state_68, y = scattered_k_7_cast_fp16)[name = string("k_cache_23_cast_fp16")];
write_state(data = k_cache_23_cast_fp16, input = k_cache_3)[name = string("coreml_update_state_69_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_69 = read_state(input = k_cache_3)[name = string("coreml_update_state_69")];
tensor<fp16, [1, 8, 1024, 128]> read_state_7 = read_state(input = v_cache_3)[name = string("read_state_7")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_21_cast_fp16 = mul(x = read_state_7, y = var_224_cast_fp16)[name = string("v_cache_21_cast_fp16")];
write_state(data = v_cache_21_cast_fp16, input = v_cache_3)[name = string("coreml_update_state_70_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_70 = read_state(input = v_cache_3)[name = string("coreml_update_state_70")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_7_cast_fp16 = transpose(perm = scattered_v_7_perm_0, x = reshape_23_cast_fp16)[name = string("transpose_71")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_23_cast_fp16 = add(x = coreml_update_state_70, y = scattered_v_7_cast_fp16)[name = string("v_cache_23_cast_fp16")];
write_state(data = v_cache_23_cast_fp16, input = v_cache_3)[name = string("coreml_update_state_71_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_71 = read_state(input = v_cache_3)[name = string("coreml_update_state_71")];
tensor<int32, [1]> var_812_axes_0 = const()[name = string("op_812_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_812_cast_fp16 = expand_dims(axes = var_812_axes_0, x = coreml_update_state_69)[name = string("op_812_cast_fp16")];
tensor<int32, [5]> k_exp_13_reps_0 = const()[name = string("k_exp_13_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_13_cast_fp16 = tile(reps = k_exp_13_reps_0, x = var_812_cast_fp16)[name = string("k_exp_13_cast_fp16")];
tensor<int32, [4]> var_815 = const()[name = string("op_815"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_15_cast_fp16 = reshape(shape = var_815, x = k_exp_13_cast_fp16)[name = string("k_exp_15_cast_fp16")];
tensor<int32, [1]> var_817_axes_0 = const()[name = string("op_817_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_817_cast_fp16 = expand_dims(axes = var_817_axes_0, x = coreml_update_state_71)[name = string("op_817_cast_fp16")];
tensor<int32, [5]> v_exp_13_reps_0 = const()[name = string("v_exp_13_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_13_cast_fp16 = tile(reps = v_exp_13_reps_0, x = var_817_cast_fp16)[name = string("v_exp_13_cast_fp16")];
tensor<int32, [4]> var_820 = const()[name = string("op_820"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_15_cast_fp16 = reshape(shape = var_820, x = v_exp_13_cast_fp16)[name = string("v_exp_15_cast_fp16")];
bool var_823_transpose_x_1 = const()[name = string("op_823_transpose_x_1"), val = bool(false)];
bool var_823_transpose_y_1 = const()[name = string("op_823_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_823_cast_fp16 = matmul(transpose_x = var_823_transpose_x_1, transpose_y = var_823_transpose_y_1, x = q_7_cast_fp16, y = k_exp_15_cast_fp16)[name = string("op_823_cast_fp16")];
fp16 var_824_to_fp16 = const()[name = string("op_824_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_13_cast_fp16 = mul(x = var_823_cast_fp16, y = var_824_to_fp16)[name = string("attn_13_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_31_cast_fp16 = add(x = attn_13_cast_fp16, y = attention_mask_to_fp16)[name = string("input_31_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_15_cast_fp16 = softmax(axis = var_681, x = input_31_cast_fp16)[name = string("attn_15_cast_fp16")];
bool out_7_transpose_x_0 = const()[name = string("out_7_transpose_x_0"), val = bool(false)];
bool out_7_transpose_y_0 = const()[name = string("out_7_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_7_cast_fp16 = matmul(transpose_x = out_7_transpose_x_0, transpose_y = out_7_transpose_y_0, x = attn_15_cast_fp16, y = v_exp_15_cast_fp16)[name = string("out_7_cast_fp16")];
tensor<int32, [4]> var_829_perm_0 = const()[name = string("op_829_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_830 = const()[name = string("op_830"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_829_cast_fp16 = transpose(perm = var_829_perm_0, x = out_7_cast_fp16)[name = string("transpose_70")];
tensor<fp16, [1, 128, 2048]> input_33_cast_fp16 = reshape(shape = var_830, x = var_829_cast_fp16)[name = string("input_33_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425664))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53522880))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_24_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_97_cast_fp16 = add(x = x_77_cast_fp16, y = linear_24_cast_fp16)[name = string("x_97_cast_fp16")];
fp16 var_680_promoted_3_to_fp16 = const()[name = string("op_680_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_837_cast_fp16 = pow(x = x_97_cast_fp16, y = var_680_promoted_3_to_fp16)[name = string("op_837_cast_fp16")];
tensor<int32, [1]> var_839_axes_0 = const()[name = string("op_839_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_839_keep_dims_0 = const()[name = string("op_839_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_839_cast_fp16 = reduce_mean(axes = var_839_axes_0, keep_dims = var_839_keep_dims_0, x = var_837_cast_fp16)[name = string("op_839_cast_fp16")];
fp16 var_840_to_fp16 = const()[name = string("op_840_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_841_cast_fp16 = add(x = var_839_cast_fp16, y = var_840_to_fp16)[name = string("op_841_cast_fp16")];
fp32 norm_31_epsilon_0 = const()[name = string("norm_31_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_31_cast_fp16 = rsqrt(epsilon = norm_31_epsilon_0, x = var_841_cast_fp16)[name = string("norm_31_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_843_cast_fp16 = mul(x = x_97_cast_fp16, y = norm_31_cast_fp16)[name = string("op_843_cast_fp16")];
tensor<fp16, [1024]> layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53523456)))];
tensor<fp16, [1, 128, 1024]> var_844_cast_fp16 = mul(x = var_843_cast_fp16, y = layers_3_post_attention_layernorm_weight_to_fp16)[name = string("op_844_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53525568))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671360))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = var_844_cast_fp16)[name = string("linear_25_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_854_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_854_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671936))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59817728))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = var_844_cast_fp16)[name = string("linear_26_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_39_cast_fp16 = mul(x = var_854_cast_fp16, y = linear_26_cast_fp16)[name = string("input_39_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59818304))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964096))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("linear_27_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_27_cast_fp16)[name = string("x_103_cast_fp16")];
int32 var_875 = const()[name = string("op_875"), val = int32(-1)];
fp16 var_874_promoted_to_fp16 = const()[name = string("op_874_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_884_cast_fp16 = pow(x = x_103_cast_fp16, y = var_874_promoted_to_fp16)[name = string("op_884_cast_fp16")];
tensor<int32, [1]> var_886_axes_0 = const()[name = string("op_886_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_886_keep_dims_0 = const()[name = string("op_886_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_886_cast_fp16 = reduce_mean(axes = var_886_axes_0, keep_dims = var_886_keep_dims_0, x = var_884_cast_fp16)[name = string("op_886_cast_fp16")];
fp16 var_887_to_fp16 = const()[name = string("op_887_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_888_cast_fp16 = add(x = var_886_cast_fp16, y = var_887_to_fp16)[name = string("op_888_cast_fp16")];
fp32 norm_33_epsilon_0 = const()[name = string("norm_33_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_33_cast_fp16 = rsqrt(epsilon = norm_33_epsilon_0, x = var_888_cast_fp16)[name = string("norm_33_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_890_cast_fp16 = mul(x = x_103_cast_fp16, y = norm_33_cast_fp16)[name = string("op_890_cast_fp16")];
tensor<fp16, [1024]> layers_4_input_layernorm_weight_to_fp16 = const()[name = string("layers_4_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964672)))];
tensor<fp16, [1, 128, 1024]> var_891_cast_fp16 = mul(x = var_890_cast_fp16, y = layers_4_input_layernorm_weight_to_fp16)[name = string("op_891_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966784))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064000))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = var_891_cast_fp16)[name = string("linear_28_cast_fp16")];
tensor<int32, [4]> var_907 = const()[name = string("op_907"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_908_cast_fp16 = reshape(shape = var_907, x = linear_28_cast_fp16)[name = string("op_908_cast_fp16")];
tensor<int32, [4]> x_109_perm_0 = const()[name = string("x_109_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064576))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113216))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = var_891_cast_fp16)[name = string("linear_29_cast_fp16")];
tensor<int32, [4]> var_912 = const()[name = string("op_912"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_913_cast_fp16 = reshape(shape = var_912, x = linear_29_cast_fp16)[name = string("op_913_cast_fp16")];
tensor<int32, [4]> x_113_perm_0 = const()[name = string("x_113_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113792))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67162432))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = var_891_cast_fp16)[name = string("linear_30_cast_fp16")];
tensor<int32, [4]> var_917 = const()[name = string("op_917"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_918_cast_fp16 = reshape(shape = var_917, x = linear_30_cast_fp16)[name = string("op_918_cast_fp16")];
tensor<int32, [4]> transpose_60_perm_0 = const()[name = string("transpose_60_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_874_promoted_1_to_fp16 = const()[name = string("op_874_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_109_cast_fp16 = transpose(perm = x_109_perm_0, x = var_908_cast_fp16)[name = string("transpose_69")];
tensor<fp16, [1, 16, 128, 128]> var_922_cast_fp16 = pow(x = x_109_cast_fp16, y = var_874_promoted_1_to_fp16)[name = string("op_922_cast_fp16")];
tensor<int32, [1]> var_924_axes_0 = const()[name = string("op_924_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_924_keep_dims_0 = const()[name = string("op_924_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_924_cast_fp16 = reduce_mean(axes = var_924_axes_0, keep_dims = var_924_keep_dims_0, x = var_922_cast_fp16)[name = string("op_924_cast_fp16")];
fp16 var_925_to_fp16 = const()[name = string("op_925_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_926_cast_fp16 = add(x = var_924_cast_fp16, y = var_925_to_fp16)[name = string("op_926_cast_fp16")];
fp32 norm_35_epsilon_0 = const()[name = string("norm_35_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_35_cast_fp16 = rsqrt(epsilon = norm_35_epsilon_0, x = var_926_cast_fp16)[name = string("norm_35_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_928_cast_fp16 = mul(x = x_109_cast_fp16, y = norm_35_cast_fp16)[name = string("op_928_cast_fp16")];
tensor<fp16, [128]> layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163008)))];
tensor<fp16, [1, 16, 128, 128]> var_929_cast_fp16 = mul(x = var_928_cast_fp16, y = layers_4_self_attn_q_norm_weight_to_fp16)[name = string("op_929_cast_fp16")];
fp16 var_874_promoted_2_to_fp16 = const()[name = string("op_874_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_113_cast_fp16 = transpose(perm = x_113_perm_0, x = var_913_cast_fp16)[name = string("transpose_68")];
tensor<fp16, [1, 8, 128, 128]> var_933_cast_fp16 = pow(x = x_113_cast_fp16, y = var_874_promoted_2_to_fp16)[name = string("op_933_cast_fp16")];
tensor<int32, [1]> var_935_axes_0 = const()[name = string("op_935_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_935_keep_dims_0 = const()[name = string("op_935_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_935_cast_fp16 = reduce_mean(axes = var_935_axes_0, keep_dims = var_935_keep_dims_0, x = var_933_cast_fp16)[name = string("op_935_cast_fp16")];
fp16 var_936_to_fp16 = const()[name = string("op_936_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_937_cast_fp16 = add(x = var_935_cast_fp16, y = var_936_to_fp16)[name = string("op_937_cast_fp16")];
fp32 norm_37_epsilon_0 = const()[name = string("norm_37_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_37_cast_fp16 = rsqrt(epsilon = norm_37_epsilon_0, x = var_937_cast_fp16)[name = string("norm_37_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_939_cast_fp16 = mul(x = x_113_cast_fp16, y = norm_37_cast_fp16)[name = string("op_939_cast_fp16")];
tensor<fp16, [128]> layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163328)))];
tensor<fp16, [1, 8, 128, 128]> var_940_cast_fp16 = mul(x = var_939_cast_fp16, y = layers_4_self_attn_k_norm_weight_to_fp16)[name = string("op_940_cast_fp16")];
tensor<int32, [4]> x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_929_cast_fp16)[name = string("x1_17_cast_fp16")];
tensor<int32, [4]> x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_929_cast_fp16)[name = string("x2_17_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_961_cast_fp16 = mul(x = x1_17_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_961_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_962_cast_fp16 = mul(x = x2_17_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_962_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_963_cast_fp16 = sub(x = var_961_cast_fp16, y = var_962_cast_fp16)[name = string("op_963_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_964_cast_fp16 = mul(x = x2_17_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_964_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_965_cast_fp16 = mul(x = x1_17_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_965_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_966_cast_fp16 = add(x = var_964_cast_fp16, y = var_965_cast_fp16)[name = string("op_966_cast_fp16")];
bool q_9_interleave_0 = const()[name = string("q_9_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_9_cast_fp16 = concat(axis = var_875, interleave = q_9_interleave_0, values = (var_963_cast_fp16, var_966_cast_fp16))[name = string("q_9_cast_fp16")];
tensor<int32, [4]> x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_940_cast_fp16)[name = string("x1_19_cast_fp16")];
tensor<int32, [4]> x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_940_cast_fp16)[name = string("x2_19_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_988_cast_fp16 = mul(x = x1_19_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_988_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_989_cast_fp16 = mul(x = x2_19_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_989_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_990_cast_fp16 = sub(x = var_988_cast_fp16, y = var_989_cast_fp16)[name = string("op_990_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_991_cast_fp16 = mul(x = x2_19_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_991_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_992_cast_fp16 = mul(x = x1_19_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_992_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_993_cast_fp16 = add(x = var_991_cast_fp16, y = var_992_cast_fp16)[name = string("op_993_cast_fp16")];
bool var_995_interleave_0 = const()[name = string("op_995_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_995_cast_fp16 = concat(axis = var_875, interleave = var_995_interleave_0, values = (var_990_cast_fp16, var_993_cast_fp16))[name = string("op_995_cast_fp16")];
tensor<int32, [4]> transpose_17_perm_0 = const()[name = string("transpose_17_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_76 = const()[name = string("concat_76"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_17_cast_fp16 = transpose(perm = transpose_17_perm_0, x = var_995_cast_fp16)[name = string("transpose_67")];
tensor<fp16, [128, 1024]> reshape_25_cast_fp16 = reshape(shape = concat_76, x = transpose_17_cast_fp16)[name = string("reshape_25_cast_fp16")];
bool matmul_8_transpose_x_1 = const()[name = string("matmul_8_transpose_x_1"), val = bool(true)];
bool matmul_8_transpose_y_1 = const()[name = string("matmul_8_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_1, transpose_y = matmul_8_transpose_y_1, x = var_68_to_fp16, y = reshape_25_cast_fp16)[name = string("matmul_8_cast_fp16")];
tensor<int32, [4]> concat_79 = const()[name = string("concat_79"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_26_cast_fp16 = reshape(shape = concat_79, x = matmul_8_cast_fp16)[name = string("reshape_26_cast_fp16")];
tensor<int32, [4]> scattered_k_9_perm_0 = const()[name = string("scattered_k_9_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_60_cast_fp16 = transpose(perm = transpose_60_perm_0, x = var_918_cast_fp16)[name = string("transpose_66")];
tensor<fp16, [128, 1024]> reshape_28_cast_fp16 = reshape(shape = concat_84, x = transpose_60_cast_fp16)[name = string("reshape_28_cast_fp16")];
bool matmul_9_transpose_x_1 = const()[name = string("matmul_9_transpose_x_1"), val = bool(true)];
bool matmul_9_transpose_y_1 = const()[name = string("matmul_9_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_1, transpose_y = matmul_9_transpose_y_1, x = var_68_to_fp16, y = reshape_28_cast_fp16)[name = string("matmul_9_cast_fp16")];
tensor<int32, [4]> concat_87 = const()[name = string("concat_87"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_29_cast_fp16 = reshape(shape = concat_87, x = matmul_9_cast_fp16)[name = string("reshape_29_cast_fp16")];
tensor<int32, [4]> scattered_v_9_perm_0 = const()[name = string("scattered_v_9_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_8 = read_state(input = k_cache_4)[name = string("read_state_8")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_27_cast_fp16 = mul(x = read_state_8, y = var_224_cast_fp16)[name = string("k_cache_27_cast_fp16")];
write_state(data = k_cache_27_cast_fp16, input = k_cache_4)[name = string("coreml_update_state_72_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_72 = read_state(input = k_cache_4)[name = string("coreml_update_state_72")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_9_cast_fp16 = transpose(perm = scattered_k_9_perm_0, x = reshape_26_cast_fp16)[name = string("transpose_65")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_29_cast_fp16 = add(x = coreml_update_state_72, y = scattered_k_9_cast_fp16)[name = string("k_cache_29_cast_fp16")];
write_state(data = k_cache_29_cast_fp16, input = k_cache_4)[name = string("coreml_update_state_73_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_73 = read_state(input = k_cache_4)[name = string("coreml_update_state_73")];
tensor<fp16, [1, 8, 1024, 128]> read_state_9 = read_state(input = v_cache_4)[name = string("read_state_9")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_27_cast_fp16 = mul(x = read_state_9, y = var_224_cast_fp16)[name = string("v_cache_27_cast_fp16")];
write_state(data = v_cache_27_cast_fp16, input = v_cache_4)[name = string("coreml_update_state_74_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_74 = read_state(input = v_cache_4)[name = string("coreml_update_state_74")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_9_cast_fp16 = transpose(perm = scattered_v_9_perm_0, x = reshape_29_cast_fp16)[name = string("transpose_64")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_29_cast_fp16 = add(x = coreml_update_state_74, y = scattered_v_9_cast_fp16)[name = string("v_cache_29_cast_fp16")];
write_state(data = v_cache_29_cast_fp16, input = v_cache_4)[name = string("coreml_update_state_75_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_75 = read_state(input = v_cache_4)[name = string("coreml_update_state_75")];
tensor<int32, [1]> var_1006_axes_0 = const()[name = string("op_1006_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1006_cast_fp16 = expand_dims(axes = var_1006_axes_0, x = coreml_update_state_73)[name = string("op_1006_cast_fp16")];
tensor<int32, [5]> k_exp_17_reps_0 = const()[name = string("k_exp_17_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_17_cast_fp16 = tile(reps = k_exp_17_reps_0, x = var_1006_cast_fp16)[name = string("k_exp_17_cast_fp16")];
tensor<int32, [4]> var_1009 = const()[name = string("op_1009"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_19_cast_fp16 = reshape(shape = var_1009, x = k_exp_17_cast_fp16)[name = string("k_exp_19_cast_fp16")];
tensor<int32, [1]> var_1011_axes_0 = const()[name = string("op_1011_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1011_cast_fp16 = expand_dims(axes = var_1011_axes_0, x = coreml_update_state_75)[name = string("op_1011_cast_fp16")];
tensor<int32, [5]> v_exp_17_reps_0 = const()[name = string("v_exp_17_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_17_cast_fp16 = tile(reps = v_exp_17_reps_0, x = var_1011_cast_fp16)[name = string("v_exp_17_cast_fp16")];
tensor<int32, [4]> var_1014 = const()[name = string("op_1014"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_19_cast_fp16 = reshape(shape = var_1014, x = v_exp_17_cast_fp16)[name = string("v_exp_19_cast_fp16")];
bool var_1017_transpose_x_1 = const()[name = string("op_1017_transpose_x_1"), val = bool(false)];
bool var_1017_transpose_y_1 = const()[name = string("op_1017_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_1017_cast_fp16 = matmul(transpose_x = var_1017_transpose_x_1, transpose_y = var_1017_transpose_y_1, x = q_9_cast_fp16, y = k_exp_19_cast_fp16)[name = string("op_1017_cast_fp16")];
fp16 var_1018_to_fp16 = const()[name = string("op_1018_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_17_cast_fp16 = mul(x = var_1017_cast_fp16, y = var_1018_to_fp16)[name = string("attn_17_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_41_cast_fp16 = add(x = attn_17_cast_fp16, y = attention_mask_to_fp16)[name = string("input_41_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_19_cast_fp16 = softmax(axis = var_875, x = input_41_cast_fp16)[name = string("attn_19_cast_fp16")];
bool out_9_transpose_x_0 = const()[name = string("out_9_transpose_x_0"), val = bool(false)];
bool out_9_transpose_y_0 = const()[name = string("out_9_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_9_cast_fp16 = matmul(transpose_x = out_9_transpose_x_0, transpose_y = out_9_transpose_y_0, x = attn_19_cast_fp16, y = v_exp_19_cast_fp16)[name = string("out_9_cast_fp16")];
tensor<int32, [4]> var_1023_perm_0 = const()[name = string("op_1023_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1024 = const()[name = string("op_1024"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_1023_cast_fp16 = transpose(perm = var_1023_perm_0, x = out_9_cast_fp16)[name = string("transpose_63")];
tensor<fp16, [1, 128, 2048]> input_43_cast_fp16 = reshape(shape = var_1024, x = var_1023_cast_fp16)[name = string("input_43_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163648))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69260864))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("linear_31_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_123_cast_fp16 = add(x = x_103_cast_fp16, y = linear_31_cast_fp16)[name = string("x_123_cast_fp16")];
fp16 var_874_promoted_3_to_fp16 = const()[name = string("op_874_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1031_cast_fp16 = pow(x = x_123_cast_fp16, y = var_874_promoted_3_to_fp16)[name = string("op_1031_cast_fp16")];
tensor<int32, [1]> var_1033_axes_0 = const()[name = string("op_1033_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1033_keep_dims_0 = const()[name = string("op_1033_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1033_cast_fp16 = reduce_mean(axes = var_1033_axes_0, keep_dims = var_1033_keep_dims_0, x = var_1031_cast_fp16)[name = string("op_1033_cast_fp16")];
fp16 var_1034_to_fp16 = const()[name = string("op_1034_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1035_cast_fp16 = add(x = var_1033_cast_fp16, y = var_1034_to_fp16)[name = string("op_1035_cast_fp16")];
fp32 norm_39_epsilon_0 = const()[name = string("norm_39_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_39_cast_fp16 = rsqrt(epsilon = norm_39_epsilon_0, x = var_1035_cast_fp16)[name = string("norm_39_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1037_cast_fp16 = mul(x = x_123_cast_fp16, y = norm_39_cast_fp16)[name = string("op_1037_cast_fp16")];
tensor<fp16, [1024]> layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69261440)))];
tensor<fp16, [1, 128, 1024]> var_1038_cast_fp16 = mul(x = var_1037_cast_fp16, y = layers_4_post_attention_layernorm_weight_to_fp16)[name = string("op_1038_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69263552))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409344))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = var_1038_cast_fp16)[name = string("linear_32_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_1048_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1048_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409920))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75555712))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = var_1038_cast_fp16)[name = string("linear_33_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_49_cast_fp16 = mul(x = var_1048_cast_fp16, y = linear_33_cast_fp16)[name = string("input_49_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75556288))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702080))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("linear_34_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_129_cast_fp16 = add(x = x_123_cast_fp16, y = linear_34_cast_fp16)[name = string("x_129_cast_fp16")];
int32 var_1069 = const()[name = string("op_1069"), val = int32(-1)];
fp16 var_1068_promoted_to_fp16 = const()[name = string("op_1068_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1078_cast_fp16 = pow(x = x_129_cast_fp16, y = var_1068_promoted_to_fp16)[name = string("op_1078_cast_fp16")];
tensor<int32, [1]> var_1080_axes_0 = const()[name = string("op_1080_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1080_keep_dims_0 = const()[name = string("op_1080_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1080_cast_fp16 = reduce_mean(axes = var_1080_axes_0, keep_dims = var_1080_keep_dims_0, x = var_1078_cast_fp16)[name = string("op_1080_cast_fp16")];
fp16 var_1081_to_fp16 = const()[name = string("op_1081_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1082_cast_fp16 = add(x = var_1080_cast_fp16, y = var_1081_to_fp16)[name = string("op_1082_cast_fp16")];
fp32 norm_41_epsilon_0 = const()[name = string("norm_41_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_41_cast_fp16 = rsqrt(epsilon = norm_41_epsilon_0, x = var_1082_cast_fp16)[name = string("norm_41_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1084_cast_fp16 = mul(x = x_129_cast_fp16, y = norm_41_cast_fp16)[name = string("op_1084_cast_fp16")];
tensor<fp16, [1024]> layers_5_input_layernorm_weight_to_fp16 = const()[name = string("layers_5_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702656)))];
tensor<fp16, [1, 128, 1024]> var_1085_cast_fp16 = mul(x = var_1084_cast_fp16, y = layers_5_input_layernorm_weight_to_fp16)[name = string("op_1085_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704768))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80801984))))[name = string("layers_5_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = var_1085_cast_fp16)[name = string("linear_35_cast_fp16")];
tensor<int32, [4]> var_1101 = const()[name = string("op_1101"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_1102_cast_fp16 = reshape(shape = var_1101, x = linear_35_cast_fp16)[name = string("op_1102_cast_fp16")];
tensor<int32, [4]> x_135_perm_0 = const()[name = string("x_135_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80802560))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851200))))[name = string("layers_5_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = var_1085_cast_fp16)[name = string("linear_36_cast_fp16")];
tensor<int32, [4]> var_1106 = const()[name = string("op_1106"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1107_cast_fp16 = reshape(shape = var_1106, x = linear_36_cast_fp16)[name = string("op_1107_cast_fp16")];
tensor<int32, [4]> x_139_perm_0 = const()[name = string("x_139_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851776))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900416))))[name = string("layers_5_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = var_1085_cast_fp16)[name = string("linear_37_cast_fp16")];
tensor<int32, [4]> var_1111 = const()[name = string("op_1111"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1112_cast_fp16 = reshape(shape = var_1111, x = linear_37_cast_fp16)[name = string("op_1112_cast_fp16")];
tensor<int32, [4]> transpose_61_perm_0 = const()[name = string("transpose_61_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_1068_promoted_1_to_fp16 = const()[name = string("op_1068_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_135_cast_fp16 = transpose(perm = x_135_perm_0, x = var_1102_cast_fp16)[name = string("transpose_62")];
tensor<fp16, [1, 16, 128, 128]> var_1116_cast_fp16 = pow(x = x_135_cast_fp16, y = var_1068_promoted_1_to_fp16)[name = string("op_1116_cast_fp16")];
tensor<int32, [1]> var_1118_axes_0 = const()[name = string("op_1118_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1118_keep_dims_0 = const()[name = string("op_1118_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_1118_cast_fp16 = reduce_mean(axes = var_1118_axes_0, keep_dims = var_1118_keep_dims_0, x = var_1116_cast_fp16)[name = string("op_1118_cast_fp16")];
fp16 var_1119_to_fp16 = const()[name = string("op_1119_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_1120_cast_fp16 = add(x = var_1118_cast_fp16, y = var_1119_to_fp16)[name = string("op_1120_cast_fp16")];
fp32 norm_43_epsilon_0 = const()[name = string("norm_43_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_43_cast_fp16 = rsqrt(epsilon = norm_43_epsilon_0, x = var_1120_cast_fp16)[name = string("norm_43_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_1122_cast_fp16 = mul(x = x_135_cast_fp16, y = norm_43_cast_fp16)[name = string("op_1122_cast_fp16")];
tensor<fp16, [128]> layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900992)))];
tensor<fp16, [1, 16, 128, 128]> var_1123_cast_fp16 = mul(x = var_1122_cast_fp16, y = layers_5_self_attn_q_norm_weight_to_fp16)[name = string("op_1123_cast_fp16")];
fp16 var_1068_promoted_2_to_fp16 = const()[name = string("op_1068_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_139_cast_fp16 = transpose(perm = x_139_perm_0, x = var_1107_cast_fp16)[name = string("transpose_61")];
tensor<fp16, [1, 8, 128, 128]> var_1127_cast_fp16 = pow(x = x_139_cast_fp16, y = var_1068_promoted_2_to_fp16)[name = string("op_1127_cast_fp16")];
tensor<int32, [1]> var_1129_axes_0 = const()[name = string("op_1129_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1129_keep_dims_0 = const()[name = string("op_1129_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_1129_cast_fp16 = reduce_mean(axes = var_1129_axes_0, keep_dims = var_1129_keep_dims_0, x = var_1127_cast_fp16)[name = string("op_1129_cast_fp16")];
fp16 var_1130_to_fp16 = const()[name = string("op_1130_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_1131_cast_fp16 = add(x = var_1129_cast_fp16, y = var_1130_to_fp16)[name = string("op_1131_cast_fp16")];
fp32 norm_45_epsilon_0 = const()[name = string("norm_45_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_45_cast_fp16 = rsqrt(epsilon = norm_45_epsilon_0, x = var_1131_cast_fp16)[name = string("norm_45_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_1133_cast_fp16 = mul(x = x_139_cast_fp16, y = norm_45_cast_fp16)[name = string("op_1133_cast_fp16")];
tensor<fp16, [128]> layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901312)))];
tensor<fp16, [1, 8, 128, 128]> var_1134_cast_fp16 = mul(x = var_1133_cast_fp16, y = layers_5_self_attn_k_norm_weight_to_fp16)[name = string("op_1134_cast_fp16")];
tensor<int32, [4]> x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1123_cast_fp16)[name = string("x1_21_cast_fp16")];
tensor<int32, [4]> x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1123_cast_fp16)[name = string("x2_21_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1155_cast_fp16 = mul(x = x1_21_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1155_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1156_cast_fp16 = mul(x = x2_21_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1156_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1157_cast_fp16 = sub(x = var_1155_cast_fp16, y = var_1156_cast_fp16)[name = string("op_1157_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1158_cast_fp16 = mul(x = x2_21_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1158_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1159_cast_fp16 = mul(x = x1_21_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1159_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1160_cast_fp16 = add(x = var_1158_cast_fp16, y = var_1159_cast_fp16)[name = string("op_1160_cast_fp16")];
bool q_11_interleave_0 = const()[name = string("q_11_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_11_cast_fp16 = concat(axis = var_1069, interleave = q_11_interleave_0, values = (var_1157_cast_fp16, var_1160_cast_fp16))[name = string("q_11_cast_fp16")];
tensor<int32, [4]> x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1134_cast_fp16)[name = string("x1_23_cast_fp16")];
tensor<int32, [4]> x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1134_cast_fp16)[name = string("x2_23_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1182_cast_fp16 = mul(x = x1_23_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1182_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1183_cast_fp16 = mul(x = x2_23_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1183_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1184_cast_fp16 = sub(x = var_1182_cast_fp16, y = var_1183_cast_fp16)[name = string("op_1184_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1185_cast_fp16 = mul(x = x2_23_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1185_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1186_cast_fp16 = mul(x = x1_23_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1186_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1187_cast_fp16 = add(x = var_1185_cast_fp16, y = var_1186_cast_fp16)[name = string("op_1187_cast_fp16")];
bool var_1189_interleave_0 = const()[name = string("op_1189_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_1189_cast_fp16 = concat(axis = var_1069, interleave = var_1189_interleave_0, values = (var_1184_cast_fp16, var_1187_cast_fp16))[name = string("op_1189_cast_fp16")];
tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_94 = const()[name = string("concat_94"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_21_cast_fp16 = transpose(perm = transpose_21_perm_0, x = var_1189_cast_fp16)[name = string("transpose_60")];
tensor<fp16, [128, 1024]> reshape_31_cast_fp16 = reshape(shape = concat_94, x = transpose_21_cast_fp16)[name = string("reshape_31_cast_fp16")];
bool matmul_10_transpose_x_1 = const()[name = string("matmul_10_transpose_x_1"), val = bool(true)];
bool matmul_10_transpose_y_1 = const()[name = string("matmul_10_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_1, transpose_y = matmul_10_transpose_y_1, x = var_68_to_fp16, y = reshape_31_cast_fp16)[name = string("matmul_10_cast_fp16")];
tensor<int32, [4]> concat_97 = const()[name = string("concat_97"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_32_cast_fp16 = reshape(shape = concat_97, x = matmul_10_cast_fp16)[name = string("reshape_32_cast_fp16")];
tensor<int32, [4]> scattered_k_11_perm_0 = const()[name = string("scattered_k_11_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_102 = const()[name = string("concat_102"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_61_cast_fp16 = transpose(perm = transpose_61_perm_0, x = var_1112_cast_fp16)[name = string("transpose_59")];
tensor<fp16, [128, 1024]> reshape_34_cast_fp16 = reshape(shape = concat_102, x = transpose_61_cast_fp16)[name = string("reshape_34_cast_fp16")];
bool matmul_11_transpose_x_1 = const()[name = string("matmul_11_transpose_x_1"), val = bool(true)];
bool matmul_11_transpose_y_1 = const()[name = string("matmul_11_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_1, transpose_y = matmul_11_transpose_y_1, x = var_68_to_fp16, y = reshape_34_cast_fp16)[name = string("matmul_11_cast_fp16")];
tensor<int32, [4]> concat_105 = const()[name = string("concat_105"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_35_cast_fp16 = reshape(shape = concat_105, x = matmul_11_cast_fp16)[name = string("reshape_35_cast_fp16")];
tensor<int32, [4]> scattered_v_11_perm_0 = const()[name = string("scattered_v_11_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_10 = read_state(input = k_cache_5)[name = string("read_state_10")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_33_cast_fp16 = mul(x = read_state_10, y = var_224_cast_fp16)[name = string("k_cache_33_cast_fp16")];
write_state(data = k_cache_33_cast_fp16, input = k_cache_5)[name = string("coreml_update_state_76_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_76 = read_state(input = k_cache_5)[name = string("coreml_update_state_76")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_11_cast_fp16 = transpose(perm = scattered_k_11_perm_0, x = reshape_32_cast_fp16)[name = string("transpose_58")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_35_cast_fp16 = add(x = coreml_update_state_76, y = scattered_k_11_cast_fp16)[name = string("k_cache_35_cast_fp16")];
write_state(data = k_cache_35_cast_fp16, input = k_cache_5)[name = string("coreml_update_state_77_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_77 = read_state(input = k_cache_5)[name = string("coreml_update_state_77")];
tensor<fp16, [1, 8, 1024, 128]> read_state_11 = read_state(input = v_cache_5)[name = string("read_state_11")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_33_cast_fp16 = mul(x = read_state_11, y = var_224_cast_fp16)[name = string("v_cache_33_cast_fp16")];
write_state(data = v_cache_33_cast_fp16, input = v_cache_5)[name = string("coreml_update_state_78_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_78 = read_state(input = v_cache_5)[name = string("coreml_update_state_78")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_11_cast_fp16 = transpose(perm = scattered_v_11_perm_0, x = reshape_35_cast_fp16)[name = string("transpose_57")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_35_cast_fp16 = add(x = coreml_update_state_78, y = scattered_v_11_cast_fp16)[name = string("v_cache_35_cast_fp16")];
write_state(data = v_cache_35_cast_fp16, input = v_cache_5)[name = string("coreml_update_state_79_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_79 = read_state(input = v_cache_5)[name = string("coreml_update_state_79")];
tensor<int32, [1]> var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1200_cast_fp16 = expand_dims(axes = var_1200_axes_0, x = coreml_update_state_77)[name = string("op_1200_cast_fp16")];
tensor<int32, [5]> k_exp_21_reps_0 = const()[name = string("k_exp_21_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_21_cast_fp16 = tile(reps = k_exp_21_reps_0, x = var_1200_cast_fp16)[name = string("k_exp_21_cast_fp16")];
tensor<int32, [4]> var_1203 = const()[name = string("op_1203"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_23_cast_fp16 = reshape(shape = var_1203, x = k_exp_21_cast_fp16)[name = string("k_exp_23_cast_fp16")];
tensor<int32, [1]> var_1205_axes_0 = const()[name = string("op_1205_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1205_cast_fp16 = expand_dims(axes = var_1205_axes_0, x = coreml_update_state_79)[name = string("op_1205_cast_fp16")];
tensor<int32, [5]> v_exp_21_reps_0 = const()[name = string("v_exp_21_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_21_cast_fp16 = tile(reps = v_exp_21_reps_0, x = var_1205_cast_fp16)[name = string("v_exp_21_cast_fp16")];
tensor<int32, [4]> var_1208 = const()[name = string("op_1208"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_23_cast_fp16 = reshape(shape = var_1208, x = v_exp_21_cast_fp16)[name = string("v_exp_23_cast_fp16")];
bool var_1211_transpose_x_1 = const()[name = string("op_1211_transpose_x_1"), val = bool(false)];
bool var_1211_transpose_y_1 = const()[name = string("op_1211_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_1211_cast_fp16 = matmul(transpose_x = var_1211_transpose_x_1, transpose_y = var_1211_transpose_y_1, x = q_11_cast_fp16, y = k_exp_23_cast_fp16)[name = string("op_1211_cast_fp16")];
fp16 var_1212_to_fp16 = const()[name = string("op_1212_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_21_cast_fp16 = mul(x = var_1211_cast_fp16, y = var_1212_to_fp16)[name = string("attn_21_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_51_cast_fp16 = add(x = attn_21_cast_fp16, y = attention_mask_to_fp16)[name = string("input_51_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_23_cast_fp16 = softmax(axis = var_1069, x = input_51_cast_fp16)[name = string("attn_23_cast_fp16")];
bool out_11_transpose_x_0 = const()[name = string("out_11_transpose_x_0"), val = bool(false)];
bool out_11_transpose_y_0 = const()[name = string("out_11_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_11_cast_fp16 = matmul(transpose_x = out_11_transpose_x_0, transpose_y = out_11_transpose_y_0, x = attn_23_cast_fp16, y = v_exp_23_cast_fp16)[name = string("out_11_cast_fp16")];
tensor<int32, [4]> var_1217_perm_0 = const()[name = string("op_1217_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1218 = const()[name = string("op_1218"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_1217_cast_fp16 = transpose(perm = var_1217_perm_0, x = out_11_cast_fp16)[name = string("transpose_56")];
tensor<fp16, [1, 128, 2048]> input_53_cast_fp16 = reshape(shape = var_1218, x = var_1217_cast_fp16)[name = string("input_53_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901632))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84998848))))[name = string("layers_5_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("linear_38_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_149_cast_fp16 = add(x = x_129_cast_fp16, y = linear_38_cast_fp16)[name = string("x_149_cast_fp16")];
fp16 var_1068_promoted_3_to_fp16 = const()[name = string("op_1068_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1225_cast_fp16 = pow(x = x_149_cast_fp16, y = var_1068_promoted_3_to_fp16)[name = string("op_1225_cast_fp16")];
tensor<int32, [1]> var_1227_axes_0 = const()[name = string("op_1227_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1227_keep_dims_0 = const()[name = string("op_1227_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1227_cast_fp16 = reduce_mean(axes = var_1227_axes_0, keep_dims = var_1227_keep_dims_0, x = var_1225_cast_fp16)[name = string("op_1227_cast_fp16")];
fp16 var_1228_to_fp16 = const()[name = string("op_1228_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1229_cast_fp16 = add(x = var_1227_cast_fp16, y = var_1228_to_fp16)[name = string("op_1229_cast_fp16")];
fp32 norm_47_epsilon_0 = const()[name = string("norm_47_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_47_cast_fp16 = rsqrt(epsilon = norm_47_epsilon_0, x = var_1229_cast_fp16)[name = string("norm_47_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1231_cast_fp16 = mul(x = x_149_cast_fp16, y = norm_47_cast_fp16)[name = string("op_1231_cast_fp16")];
tensor<fp16, [1024]> layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84999424)))];
tensor<fp16, [1, 128, 1024]> var_1232_cast_fp16 = mul(x = var_1231_cast_fp16, y = layers_5_post_attention_layernorm_weight_to_fp16)[name = string("op_1232_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85001536))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147328))))[name = string("layers_5_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = var_1232_cast_fp16)[name = string("linear_39_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_1242_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1242_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147904))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91293696))))[name = string("layers_5_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_up_proj_weight_to_fp16_palettized, x = var_1232_cast_fp16)[name = string("linear_40_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_59_cast_fp16 = mul(x = var_1242_cast_fp16, y = linear_40_cast_fp16)[name = string("input_59_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91294272))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440064))))[name = string("layers_5_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("linear_41_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_155_cast_fp16 = add(x = x_149_cast_fp16, y = linear_41_cast_fp16)[name = string("x_155_cast_fp16")];
int32 var_1263 = const()[name = string("op_1263"), val = int32(-1)];
fp16 var_1262_promoted_to_fp16 = const()[name = string("op_1262_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1272_cast_fp16 = pow(x = x_155_cast_fp16, y = var_1262_promoted_to_fp16)[name = string("op_1272_cast_fp16")];
tensor<int32, [1]> var_1274_axes_0 = const()[name = string("op_1274_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1274_keep_dims_0 = const()[name = string("op_1274_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1274_cast_fp16 = reduce_mean(axes = var_1274_axes_0, keep_dims = var_1274_keep_dims_0, x = var_1272_cast_fp16)[name = string("op_1274_cast_fp16")];
fp16 var_1275_to_fp16 = const()[name = string("op_1275_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1276_cast_fp16 = add(x = var_1274_cast_fp16, y = var_1275_to_fp16)[name = string("op_1276_cast_fp16")];
fp32 norm_49_epsilon_0 = const()[name = string("norm_49_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_49_cast_fp16 = rsqrt(epsilon = norm_49_epsilon_0, x = var_1276_cast_fp16)[name = string("norm_49_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1278_cast_fp16 = mul(x = x_155_cast_fp16, y = norm_49_cast_fp16)[name = string("op_1278_cast_fp16")];
tensor<fp16, [1024]> layers_6_input_layernorm_weight_to_fp16 = const()[name = string("layers_6_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440640)))];
tensor<fp16, [1, 128, 1024]> var_1279_cast_fp16 = mul(x = var_1278_cast_fp16, y = layers_6_input_layernorm_weight_to_fp16)[name = string("op_1279_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94442752))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96539968))))[name = string("layers_6_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = var_1279_cast_fp16)[name = string("linear_42_cast_fp16")];
tensor<int32, [4]> var_1295 = const()[name = string("op_1295"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_1296_cast_fp16 = reshape(shape = var_1295, x = linear_42_cast_fp16)[name = string("op_1296_cast_fp16")];
tensor<int32, [4]> x_161_perm_0 = const()[name = string("x_161_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96540544))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589184))))[name = string("layers_6_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = var_1279_cast_fp16)[name = string("linear_43_cast_fp16")];
tensor<int32, [4]> var_1300 = const()[name = string("op_1300"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1301_cast_fp16 = reshape(shape = var_1300, x = linear_43_cast_fp16)[name = string("op_1301_cast_fp16")];
tensor<int32, [4]> x_165_perm_0 = const()[name = string("x_165_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589760))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638400))))[name = string("layers_6_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = var_1279_cast_fp16)[name = string("linear_44_cast_fp16")];
tensor<int32, [4]> var_1305 = const()[name = string("op_1305"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1306_cast_fp16 = reshape(shape = var_1305, x = linear_44_cast_fp16)[name = string("op_1306_cast_fp16")];
tensor<int32, [4]> transpose_62_perm_0 = const()[name = string("transpose_62_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_1262_promoted_1_to_fp16 = const()[name = string("op_1262_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_161_cast_fp16 = transpose(perm = x_161_perm_0, x = var_1296_cast_fp16)[name = string("transpose_55")];
tensor<fp16, [1, 16, 128, 128]> var_1310_cast_fp16 = pow(x = x_161_cast_fp16, y = var_1262_promoted_1_to_fp16)[name = string("op_1310_cast_fp16")];
tensor<int32, [1]> var_1312_axes_0 = const()[name = string("op_1312_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1312_keep_dims_0 = const()[name = string("op_1312_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_1312_cast_fp16 = reduce_mean(axes = var_1312_axes_0, keep_dims = var_1312_keep_dims_0, x = var_1310_cast_fp16)[name = string("op_1312_cast_fp16")];
fp16 var_1313_to_fp16 = const()[name = string("op_1313_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_1314_cast_fp16 = add(x = var_1312_cast_fp16, y = var_1313_to_fp16)[name = string("op_1314_cast_fp16")];
fp32 norm_51_epsilon_0 = const()[name = string("norm_51_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_51_cast_fp16 = rsqrt(epsilon = norm_51_epsilon_0, x = var_1314_cast_fp16)[name = string("norm_51_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_1316_cast_fp16 = mul(x = x_161_cast_fp16, y = norm_51_cast_fp16)[name = string("op_1316_cast_fp16")];
tensor<fp16, [128]> layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638976)))];
tensor<fp16, [1, 16, 128, 128]> var_1317_cast_fp16 = mul(x = var_1316_cast_fp16, y = layers_6_self_attn_q_norm_weight_to_fp16)[name = string("op_1317_cast_fp16")];
fp16 var_1262_promoted_2_to_fp16 = const()[name = string("op_1262_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_165_cast_fp16 = transpose(perm = x_165_perm_0, x = var_1301_cast_fp16)[name = string("transpose_54")];
tensor<fp16, [1, 8, 128, 128]> var_1321_cast_fp16 = pow(x = x_165_cast_fp16, y = var_1262_promoted_2_to_fp16)[name = string("op_1321_cast_fp16")];
tensor<int32, [1]> var_1323_axes_0 = const()[name = string("op_1323_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1323_keep_dims_0 = const()[name = string("op_1323_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_1323_cast_fp16 = reduce_mean(axes = var_1323_axes_0, keep_dims = var_1323_keep_dims_0, x = var_1321_cast_fp16)[name = string("op_1323_cast_fp16")];
fp16 var_1324_to_fp16 = const()[name = string("op_1324_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_1325_cast_fp16 = add(x = var_1323_cast_fp16, y = var_1324_to_fp16)[name = string("op_1325_cast_fp16")];
fp32 norm_53_epsilon_0 = const()[name = string("norm_53_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_53_cast_fp16 = rsqrt(epsilon = norm_53_epsilon_0, x = var_1325_cast_fp16)[name = string("norm_53_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_1327_cast_fp16 = mul(x = x_165_cast_fp16, y = norm_53_cast_fp16)[name = string("op_1327_cast_fp16")];
tensor<fp16, [128]> layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639296)))];
tensor<fp16, [1, 8, 128, 128]> var_1328_cast_fp16 = mul(x = var_1327_cast_fp16, y = layers_6_self_attn_k_norm_weight_to_fp16)[name = string("op_1328_cast_fp16")];
tensor<int32, [4]> x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1317_cast_fp16)[name = string("x1_25_cast_fp16")];
tensor<int32, [4]> x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1317_cast_fp16)[name = string("x2_25_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1349_cast_fp16 = mul(x = x1_25_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1349_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1350_cast_fp16 = mul(x = x2_25_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1350_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1351_cast_fp16 = sub(x = var_1349_cast_fp16, y = var_1350_cast_fp16)[name = string("op_1351_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1352_cast_fp16 = mul(x = x2_25_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1352_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1353_cast_fp16 = mul(x = x1_25_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1353_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1354_cast_fp16 = add(x = var_1352_cast_fp16, y = var_1353_cast_fp16)[name = string("op_1354_cast_fp16")];
bool q_13_interleave_0 = const()[name = string("q_13_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_13_cast_fp16 = concat(axis = var_1263, interleave = q_13_interleave_0, values = (var_1351_cast_fp16, var_1354_cast_fp16))[name = string("q_13_cast_fp16")];
tensor<int32, [4]> x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = var_1328_cast_fp16)[name = string("x1_27_cast_fp16")];
tensor<int32, [4]> x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = var_1328_cast_fp16)[name = string("x2_27_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1376_cast_fp16 = mul(x = x1_27_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1376_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1377_cast_fp16 = mul(x = x2_27_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1377_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1378_cast_fp16 = sub(x = var_1376_cast_fp16, y = var_1377_cast_fp16)[name = string("op_1378_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1379_cast_fp16 = mul(x = x2_27_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1379_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1380_cast_fp16 = mul(x = x1_27_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1380_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1381_cast_fp16 = add(x = var_1379_cast_fp16, y = var_1380_cast_fp16)[name = string("op_1381_cast_fp16")];
bool var_1383_interleave_0 = const()[name = string("op_1383_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_1383_cast_fp16 = concat(axis = var_1263, interleave = var_1383_interleave_0, values = (var_1378_cast_fp16, var_1381_cast_fp16))[name = string("op_1383_cast_fp16")];
tensor<int32, [4]> transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_112 = const()[name = string("concat_112"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_25_cast_fp16 = transpose(perm = transpose_25_perm_0, x = var_1383_cast_fp16)[name = string("transpose_53")];
tensor<fp16, [128, 1024]> reshape_37_cast_fp16 = reshape(shape = concat_112, x = transpose_25_cast_fp16)[name = string("reshape_37_cast_fp16")];
bool matmul_12_transpose_x_1 = const()[name = string("matmul_12_transpose_x_1"), val = bool(true)];
bool matmul_12_transpose_y_1 = const()[name = string("matmul_12_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_1, transpose_y = matmul_12_transpose_y_1, x = var_68_to_fp16, y = reshape_37_cast_fp16)[name = string("matmul_12_cast_fp16")];
tensor<int32, [4]> concat_115 = const()[name = string("concat_115"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_38_cast_fp16 = reshape(shape = concat_115, x = matmul_12_cast_fp16)[name = string("reshape_38_cast_fp16")];
tensor<int32, [4]> scattered_k_13_perm_0 = const()[name = string("scattered_k_13_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_120 = const()[name = string("concat_120"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_62_cast_fp16 = transpose(perm = transpose_62_perm_0, x = var_1306_cast_fp16)[name = string("transpose_52")];
tensor<fp16, [128, 1024]> reshape_40_cast_fp16 = reshape(shape = concat_120, x = transpose_62_cast_fp16)[name = string("reshape_40_cast_fp16")];
bool matmul_13_transpose_x_1 = const()[name = string("matmul_13_transpose_x_1"), val = bool(true)];
bool matmul_13_transpose_y_1 = const()[name = string("matmul_13_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_1, transpose_y = matmul_13_transpose_y_1, x = var_68_to_fp16, y = reshape_40_cast_fp16)[name = string("matmul_13_cast_fp16")];
tensor<int32, [4]> concat_123 = const()[name = string("concat_123"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_41_cast_fp16 = reshape(shape = concat_123, x = matmul_13_cast_fp16)[name = string("reshape_41_cast_fp16")];
tensor<int32, [4]> scattered_v_13_perm_0 = const()[name = string("scattered_v_13_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_12 = read_state(input = k_cache_6)[name = string("read_state_12")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_39_cast_fp16 = mul(x = read_state_12, y = var_224_cast_fp16)[name = string("k_cache_39_cast_fp16")];
write_state(data = k_cache_39_cast_fp16, input = k_cache_6)[name = string("coreml_update_state_80_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_80 = read_state(input = k_cache_6)[name = string("coreml_update_state_80")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_13_cast_fp16 = transpose(perm = scattered_k_13_perm_0, x = reshape_38_cast_fp16)[name = string("transpose_51")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_41_cast_fp16 = add(x = coreml_update_state_80, y = scattered_k_13_cast_fp16)[name = string("k_cache_41_cast_fp16")];
write_state(data = k_cache_41_cast_fp16, input = k_cache_6)[name = string("coreml_update_state_81_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_81 = read_state(input = k_cache_6)[name = string("coreml_update_state_81")];
tensor<fp16, [1, 8, 1024, 128]> read_state_13 = read_state(input = v_cache_6)[name = string("read_state_13")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_39_cast_fp16 = mul(x = read_state_13, y = var_224_cast_fp16)[name = string("v_cache_39_cast_fp16")];
write_state(data = v_cache_39_cast_fp16, input = v_cache_6)[name = string("coreml_update_state_82_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_82 = read_state(input = v_cache_6)[name = string("coreml_update_state_82")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_13_cast_fp16 = transpose(perm = scattered_v_13_perm_0, x = reshape_41_cast_fp16)[name = string("transpose_50")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_41_cast_fp16 = add(x = coreml_update_state_82, y = scattered_v_13_cast_fp16)[name = string("v_cache_41_cast_fp16")];
write_state(data = v_cache_41_cast_fp16, input = v_cache_6)[name = string("coreml_update_state_83_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_83 = read_state(input = v_cache_6)[name = string("coreml_update_state_83")];
tensor<int32, [1]> var_1394_axes_0 = const()[name = string("op_1394_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1394_cast_fp16 = expand_dims(axes = var_1394_axes_0, x = coreml_update_state_81)[name = string("op_1394_cast_fp16")];
tensor<int32, [5]> k_exp_25_reps_0 = const()[name = string("k_exp_25_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_25_cast_fp16 = tile(reps = k_exp_25_reps_0, x = var_1394_cast_fp16)[name = string("k_exp_25_cast_fp16")];
tensor<int32, [4]> var_1397 = const()[name = string("op_1397"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_27_cast_fp16 = reshape(shape = var_1397, x = k_exp_25_cast_fp16)[name = string("k_exp_27_cast_fp16")];
tensor<int32, [1]> var_1399_axes_0 = const()[name = string("op_1399_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1399_cast_fp16 = expand_dims(axes = var_1399_axes_0, x = coreml_update_state_83)[name = string("op_1399_cast_fp16")];
tensor<int32, [5]> v_exp_25_reps_0 = const()[name = string("v_exp_25_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_25_cast_fp16 = tile(reps = v_exp_25_reps_0, x = var_1399_cast_fp16)[name = string("v_exp_25_cast_fp16")];
tensor<int32, [4]> var_1402 = const()[name = string("op_1402"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_27_cast_fp16 = reshape(shape = var_1402, x = v_exp_25_cast_fp16)[name = string("v_exp_27_cast_fp16")];
bool var_1405_transpose_x_1 = const()[name = string("op_1405_transpose_x_1"), val = bool(false)];
bool var_1405_transpose_y_1 = const()[name = string("op_1405_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_1405_cast_fp16 = matmul(transpose_x = var_1405_transpose_x_1, transpose_y = var_1405_transpose_y_1, x = q_13_cast_fp16, y = k_exp_27_cast_fp16)[name = string("op_1405_cast_fp16")];
fp16 var_1406_to_fp16 = const()[name = string("op_1406_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_25_cast_fp16 = mul(x = var_1405_cast_fp16, y = var_1406_to_fp16)[name = string("attn_25_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_61_cast_fp16 = add(x = attn_25_cast_fp16, y = attention_mask_to_fp16)[name = string("input_61_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_27_cast_fp16 = softmax(axis = var_1263, x = input_61_cast_fp16)[name = string("attn_27_cast_fp16")];
bool out_13_transpose_x_0 = const()[name = string("out_13_transpose_x_0"), val = bool(false)];
bool out_13_transpose_y_0 = const()[name = string("out_13_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_13_cast_fp16 = matmul(transpose_x = out_13_transpose_x_0, transpose_y = out_13_transpose_y_0, x = attn_27_cast_fp16, y = v_exp_27_cast_fp16)[name = string("out_13_cast_fp16")];
tensor<int32, [4]> var_1411_perm_0 = const()[name = string("op_1411_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1412 = const()[name = string("op_1412"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_1411_cast_fp16 = transpose(perm = var_1411_perm_0, x = out_13_cast_fp16)[name = string("transpose_49")];
tensor<fp16, [1, 128, 2048]> input_63_cast_fp16 = reshape(shape = var_1412, x = var_1411_cast_fp16)[name = string("input_63_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639616))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100736832))))[name = string("layers_6_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("linear_45_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_175_cast_fp16 = add(x = x_155_cast_fp16, y = linear_45_cast_fp16)[name = string("x_175_cast_fp16")];
fp16 var_1262_promoted_3_to_fp16 = const()[name = string("op_1262_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1419_cast_fp16 = pow(x = x_175_cast_fp16, y = var_1262_promoted_3_to_fp16)[name = string("op_1419_cast_fp16")];
tensor<int32, [1]> var_1421_axes_0 = const()[name = string("op_1421_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1421_keep_dims_0 = const()[name = string("op_1421_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1421_cast_fp16 = reduce_mean(axes = var_1421_axes_0, keep_dims = var_1421_keep_dims_0, x = var_1419_cast_fp16)[name = string("op_1421_cast_fp16")];
fp16 var_1422_to_fp16 = const()[name = string("op_1422_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1423_cast_fp16 = add(x = var_1421_cast_fp16, y = var_1422_to_fp16)[name = string("op_1423_cast_fp16")];
fp32 norm_55_epsilon_0 = const()[name = string("norm_55_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_55_cast_fp16 = rsqrt(epsilon = norm_55_epsilon_0, x = var_1423_cast_fp16)[name = string("norm_55_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1425_cast_fp16 = mul(x = x_175_cast_fp16, y = norm_55_cast_fp16)[name = string("op_1425_cast_fp16")];
tensor<fp16, [1024]> layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100737408)))];
tensor<fp16, [1, 128, 1024]> var_1426_cast_fp16 = mul(x = var_1425_cast_fp16, y = layers_6_post_attention_layernorm_weight_to_fp16)[name = string("op_1426_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100739520))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885312))))[name = string("layers_6_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = var_1426_cast_fp16)[name = string("linear_46_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_1436_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1436_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885888))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107031680))))[name = string("layers_6_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_up_proj_weight_to_fp16_palettized, x = var_1426_cast_fp16)[name = string("linear_47_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_69_cast_fp16 = mul(x = var_1436_cast_fp16, y = linear_47_cast_fp16)[name = string("input_69_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107032256))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178048))))[name = string("layers_6_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("linear_48_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_181_cast_fp16 = add(x = x_175_cast_fp16, y = linear_48_cast_fp16)[name = string("x_181_cast_fp16")];
int32 var_1457 = const()[name = string("op_1457"), val = int32(-1)];
fp16 var_1456_promoted_to_fp16 = const()[name = string("op_1456_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1466_cast_fp16 = pow(x = x_181_cast_fp16, y = var_1456_promoted_to_fp16)[name = string("op_1466_cast_fp16")];
tensor<int32, [1]> var_1468_axes_0 = const()[name = string("op_1468_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1468_keep_dims_0 = const()[name = string("op_1468_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1468_cast_fp16 = reduce_mean(axes = var_1468_axes_0, keep_dims = var_1468_keep_dims_0, x = var_1466_cast_fp16)[name = string("op_1468_cast_fp16")];
fp16 var_1469_to_fp16 = const()[name = string("op_1469_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1470_cast_fp16 = add(x = var_1468_cast_fp16, y = var_1469_to_fp16)[name = string("op_1470_cast_fp16")];
fp32 norm_57_epsilon_0 = const()[name = string("norm_57_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_57_cast_fp16 = rsqrt(epsilon = norm_57_epsilon_0, x = var_1470_cast_fp16)[name = string("norm_57_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1472_cast_fp16 = mul(x = x_181_cast_fp16, y = norm_57_cast_fp16)[name = string("op_1472_cast_fp16")];
tensor<fp16, [1024]> layers_7_input_layernorm_weight_to_fp16 = const()[name = string("layers_7_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178624)))];
tensor<fp16, [1, 128, 1024]> var_1473_cast_fp16 = mul(x = var_1472_cast_fp16, y = layers_7_input_layernorm_weight_to_fp16)[name = string("op_1473_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180736))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112277952))))[name = string("layers_7_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = var_1473_cast_fp16)[name = string("linear_49_cast_fp16")];
tensor<int32, [4]> var_1489 = const()[name = string("op_1489"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_1490_cast_fp16 = reshape(shape = var_1489, x = linear_49_cast_fp16)[name = string("op_1490_cast_fp16")];
tensor<int32, [4]> x_187_perm_0 = const()[name = string("x_187_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112278528))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327168))))[name = string("layers_7_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = var_1473_cast_fp16)[name = string("linear_50_cast_fp16")];
tensor<int32, [4]> var_1494 = const()[name = string("op_1494"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1495_cast_fp16 = reshape(shape = var_1494, x = linear_50_cast_fp16)[name = string("op_1495_cast_fp16")];
tensor<int32, [4]> x_191_perm_0 = const()[name = string("x_191_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327744))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376384))))[name = string("layers_7_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = var_1473_cast_fp16)[name = string("linear_51_cast_fp16")];
tensor<int32, [4]> var_1499 = const()[name = string("op_1499"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1500_cast_fp16 = reshape(shape = var_1499, x = linear_51_cast_fp16)[name = string("op_1500_cast_fp16")];
tensor<int32, [4]> transpose_63_perm_0 = const()[name = string("transpose_63_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_1456_promoted_1_to_fp16 = const()[name = string("op_1456_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_187_cast_fp16 = transpose(perm = x_187_perm_0, x = var_1490_cast_fp16)[name = string("transpose_48")];
tensor<fp16, [1, 16, 128, 128]> var_1504_cast_fp16 = pow(x = x_187_cast_fp16, y = var_1456_promoted_1_to_fp16)[name = string("op_1504_cast_fp16")];
tensor<int32, [1]> var_1506_axes_0 = const()[name = string("op_1506_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1506_keep_dims_0 = const()[name = string("op_1506_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_1506_cast_fp16 = reduce_mean(axes = var_1506_axes_0, keep_dims = var_1506_keep_dims_0, x = var_1504_cast_fp16)[name = string("op_1506_cast_fp16")];
fp16 var_1507_to_fp16 = const()[name = string("op_1507_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_1508_cast_fp16 = add(x = var_1506_cast_fp16, y = var_1507_to_fp16)[name = string("op_1508_cast_fp16")];
fp32 norm_59_epsilon_0 = const()[name = string("norm_59_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_59_cast_fp16 = rsqrt(epsilon = norm_59_epsilon_0, x = var_1508_cast_fp16)[name = string("norm_59_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_1510_cast_fp16 = mul(x = x_187_cast_fp16, y = norm_59_cast_fp16)[name = string("op_1510_cast_fp16")];
tensor<fp16, [128]> layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376960)))];
tensor<fp16, [1, 16, 128, 128]> var_1511_cast_fp16 = mul(x = var_1510_cast_fp16, y = layers_7_self_attn_q_norm_weight_to_fp16)[name = string("op_1511_cast_fp16")];
fp16 var_1456_promoted_2_to_fp16 = const()[name = string("op_1456_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_191_cast_fp16 = transpose(perm = x_191_perm_0, x = var_1495_cast_fp16)[name = string("transpose_47")];
tensor<fp16, [1, 8, 128, 128]> var_1515_cast_fp16 = pow(x = x_191_cast_fp16, y = var_1456_promoted_2_to_fp16)[name = string("op_1515_cast_fp16")];
tensor<int32, [1]> var_1517_axes_0 = const()[name = string("op_1517_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1517_keep_dims_0 = const()[name = string("op_1517_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_1517_cast_fp16 = reduce_mean(axes = var_1517_axes_0, keep_dims = var_1517_keep_dims_0, x = var_1515_cast_fp16)[name = string("op_1517_cast_fp16")];
fp16 var_1518_to_fp16 = const()[name = string("op_1518_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_1519_cast_fp16 = add(x = var_1517_cast_fp16, y = var_1518_to_fp16)[name = string("op_1519_cast_fp16")];
fp32 norm_61_epsilon_0 = const()[name = string("norm_61_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_61_cast_fp16 = rsqrt(epsilon = norm_61_epsilon_0, x = var_1519_cast_fp16)[name = string("norm_61_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_1521_cast_fp16 = mul(x = x_191_cast_fp16, y = norm_61_cast_fp16)[name = string("op_1521_cast_fp16")];
tensor<fp16, [128]> layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377280)))];
tensor<fp16, [1, 8, 128, 128]> var_1522_cast_fp16 = mul(x = var_1521_cast_fp16, y = layers_7_self_attn_k_norm_weight_to_fp16)[name = string("op_1522_cast_fp16")];
tensor<int32, [4]> x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = var_1511_cast_fp16)[name = string("x1_29_cast_fp16")];
tensor<int32, [4]> x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = var_1511_cast_fp16)[name = string("x2_29_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1543_cast_fp16 = mul(x = x1_29_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1543_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1544_cast_fp16 = mul(x = x2_29_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1544_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1545_cast_fp16 = sub(x = var_1543_cast_fp16, y = var_1544_cast_fp16)[name = string("op_1545_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1546_cast_fp16 = mul(x = x2_29_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1546_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1547_cast_fp16 = mul(x = x1_29_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1547_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1548_cast_fp16 = add(x = var_1546_cast_fp16, y = var_1547_cast_fp16)[name = string("op_1548_cast_fp16")];
bool q_15_interleave_0 = const()[name = string("q_15_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_15_cast_fp16 = concat(axis = var_1457, interleave = q_15_interleave_0, values = (var_1545_cast_fp16, var_1548_cast_fp16))[name = string("q_15_cast_fp16")];
tensor<int32, [4]> x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = var_1522_cast_fp16)[name = string("x1_31_cast_fp16")];
tensor<int32, [4]> x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = var_1522_cast_fp16)[name = string("x2_31_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1570_cast_fp16 = mul(x = x1_31_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1570_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1571_cast_fp16 = mul(x = x2_31_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1571_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1572_cast_fp16 = sub(x = var_1570_cast_fp16, y = var_1571_cast_fp16)[name = string("op_1572_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1573_cast_fp16 = mul(x = x2_31_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1573_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1574_cast_fp16 = mul(x = x1_31_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1574_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1575_cast_fp16 = add(x = var_1573_cast_fp16, y = var_1574_cast_fp16)[name = string("op_1575_cast_fp16")];
bool var_1577_interleave_0 = const()[name = string("op_1577_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_1577_cast_fp16 = concat(axis = var_1457, interleave = var_1577_interleave_0, values = (var_1572_cast_fp16, var_1575_cast_fp16))[name = string("op_1577_cast_fp16")];
tensor<int32, [4]> transpose_29_perm_0 = const()[name = string("transpose_29_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_130 = const()[name = string("concat_130"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_29_cast_fp16 = transpose(perm = transpose_29_perm_0, x = var_1577_cast_fp16)[name = string("transpose_46")];
tensor<fp16, [128, 1024]> reshape_43_cast_fp16 = reshape(shape = concat_130, x = transpose_29_cast_fp16)[name = string("reshape_43_cast_fp16")];
bool matmul_14_transpose_x_1 = const()[name = string("matmul_14_transpose_x_1"), val = bool(true)];
bool matmul_14_transpose_y_1 = const()[name = string("matmul_14_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_1, transpose_y = matmul_14_transpose_y_1, x = var_68_to_fp16, y = reshape_43_cast_fp16)[name = string("matmul_14_cast_fp16")];
tensor<int32, [4]> concat_133 = const()[name = string("concat_133"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_44_cast_fp16 = reshape(shape = concat_133, x = matmul_14_cast_fp16)[name = string("reshape_44_cast_fp16")];
tensor<int32, [4]> scattered_k_15_perm_0 = const()[name = string("scattered_k_15_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_138 = const()[name = string("concat_138"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_63_cast_fp16 = transpose(perm = transpose_63_perm_0, x = var_1500_cast_fp16)[name = string("transpose_45")];
tensor<fp16, [128, 1024]> reshape_46_cast_fp16 = reshape(shape = concat_138, x = transpose_63_cast_fp16)[name = string("reshape_46_cast_fp16")];
bool matmul_15_transpose_x_1 = const()[name = string("matmul_15_transpose_x_1"), val = bool(true)];
bool matmul_15_transpose_y_1 = const()[name = string("matmul_15_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_1, transpose_y = matmul_15_transpose_y_1, x = var_68_to_fp16, y = reshape_46_cast_fp16)[name = string("matmul_15_cast_fp16")];
tensor<int32, [4]> concat_141 = const()[name = string("concat_141"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_47_cast_fp16 = reshape(shape = concat_141, x = matmul_15_cast_fp16)[name = string("reshape_47_cast_fp16")];
tensor<int32, [4]> scattered_v_15_perm_0 = const()[name = string("scattered_v_15_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_14 = read_state(input = k_cache_7)[name = string("read_state_14")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_45_cast_fp16 = mul(x = read_state_14, y = var_224_cast_fp16)[name = string("k_cache_45_cast_fp16")];
write_state(data = k_cache_45_cast_fp16, input = k_cache_7)[name = string("coreml_update_state_84_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_84 = read_state(input = k_cache_7)[name = string("coreml_update_state_84")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_15_cast_fp16 = transpose(perm = scattered_k_15_perm_0, x = reshape_44_cast_fp16)[name = string("transpose_44")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_47_cast_fp16 = add(x = coreml_update_state_84, y = scattered_k_15_cast_fp16)[name = string("k_cache_47_cast_fp16")];
write_state(data = k_cache_47_cast_fp16, input = k_cache_7)[name = string("coreml_update_state_85_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_85 = read_state(input = k_cache_7)[name = string("coreml_update_state_85")];
tensor<fp16, [1, 8, 1024, 128]> read_state_15 = read_state(input = v_cache_7)[name = string("read_state_15")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_45_cast_fp16 = mul(x = read_state_15, y = var_224_cast_fp16)[name = string("v_cache_45_cast_fp16")];
write_state(data = v_cache_45_cast_fp16, input = v_cache_7)[name = string("coreml_update_state_86_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_86 = read_state(input = v_cache_7)[name = string("coreml_update_state_86")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_15_cast_fp16 = transpose(perm = scattered_v_15_perm_0, x = reshape_47_cast_fp16)[name = string("transpose_43")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_47_cast_fp16 = add(x = coreml_update_state_86, y = scattered_v_15_cast_fp16)[name = string("v_cache_47_cast_fp16")];
write_state(data = v_cache_47_cast_fp16, input = v_cache_7)[name = string("coreml_update_state_87_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_87 = read_state(input = v_cache_7)[name = string("coreml_update_state_87")];
tensor<int32, [1]> var_1588_axes_0 = const()[name = string("op_1588_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1588_cast_fp16 = expand_dims(axes = var_1588_axes_0, x = coreml_update_state_85)[name = string("op_1588_cast_fp16")];
tensor<int32, [5]> k_exp_29_reps_0 = const()[name = string("k_exp_29_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_29_cast_fp16 = tile(reps = k_exp_29_reps_0, x = var_1588_cast_fp16)[name = string("k_exp_29_cast_fp16")];
tensor<int32, [4]> var_1591 = const()[name = string("op_1591"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_31_cast_fp16 = reshape(shape = var_1591, x = k_exp_29_cast_fp16)[name = string("k_exp_31_cast_fp16")];
tensor<int32, [1]> var_1593_axes_0 = const()[name = string("op_1593_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1593_cast_fp16 = expand_dims(axes = var_1593_axes_0, x = coreml_update_state_87)[name = string("op_1593_cast_fp16")];
tensor<int32, [5]> v_exp_29_reps_0 = const()[name = string("v_exp_29_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_29_cast_fp16 = tile(reps = v_exp_29_reps_0, x = var_1593_cast_fp16)[name = string("v_exp_29_cast_fp16")];
tensor<int32, [4]> var_1596 = const()[name = string("op_1596"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_31_cast_fp16 = reshape(shape = var_1596, x = v_exp_29_cast_fp16)[name = string("v_exp_31_cast_fp16")];
bool var_1599_transpose_x_1 = const()[name = string("op_1599_transpose_x_1"), val = bool(false)];
bool var_1599_transpose_y_1 = const()[name = string("op_1599_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_1599_cast_fp16 = matmul(transpose_x = var_1599_transpose_x_1, transpose_y = var_1599_transpose_y_1, x = q_15_cast_fp16, y = k_exp_31_cast_fp16)[name = string("op_1599_cast_fp16")];
fp16 var_1600_to_fp16 = const()[name = string("op_1600_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_29_cast_fp16 = mul(x = var_1599_cast_fp16, y = var_1600_to_fp16)[name = string("attn_29_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_71_cast_fp16 = add(x = attn_29_cast_fp16, y = attention_mask_to_fp16)[name = string("input_71_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_31_cast_fp16 = softmax(axis = var_1457, x = input_71_cast_fp16)[name = string("attn_31_cast_fp16")];
bool out_15_transpose_x_0 = const()[name = string("out_15_transpose_x_0"), val = bool(false)];
bool out_15_transpose_y_0 = const()[name = string("out_15_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_15_cast_fp16 = matmul(transpose_x = out_15_transpose_x_0, transpose_y = out_15_transpose_y_0, x = attn_31_cast_fp16, y = v_exp_31_cast_fp16)[name = string("out_15_cast_fp16")];
tensor<int32, [4]> var_1605_perm_0 = const()[name = string("op_1605_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1606 = const()[name = string("op_1606"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_1605_cast_fp16 = transpose(perm = var_1605_perm_0, x = out_15_cast_fp16)[name = string("transpose_42")];
tensor<fp16, [1, 128, 2048]> input_73_cast_fp16 = reshape(shape = var_1606, x = var_1605_cast_fp16)[name = string("input_73_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377600))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116474816))))[name = string("layers_7_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("linear_52_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_201_cast_fp16 = add(x = x_181_cast_fp16, y = linear_52_cast_fp16)[name = string("x_201_cast_fp16")];
fp16 var_1456_promoted_3_to_fp16 = const()[name = string("op_1456_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1613_cast_fp16 = pow(x = x_201_cast_fp16, y = var_1456_promoted_3_to_fp16)[name = string("op_1613_cast_fp16")];
tensor<int32, [1]> var_1615_axes_0 = const()[name = string("op_1615_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1615_keep_dims_0 = const()[name = string("op_1615_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1615_cast_fp16 = reduce_mean(axes = var_1615_axes_0, keep_dims = var_1615_keep_dims_0, x = var_1613_cast_fp16)[name = string("op_1615_cast_fp16")];
fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1617_cast_fp16 = add(x = var_1615_cast_fp16, y = var_1616_to_fp16)[name = string("op_1617_cast_fp16")];
fp32 norm_63_epsilon_0 = const()[name = string("norm_63_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_63_cast_fp16 = rsqrt(epsilon = norm_63_epsilon_0, x = var_1617_cast_fp16)[name = string("norm_63_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1619_cast_fp16 = mul(x = x_201_cast_fp16, y = norm_63_cast_fp16)[name = string("op_1619_cast_fp16")];
tensor<fp16, [1024]> layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116475392)))];
tensor<fp16, [1, 128, 1024]> var_1620_cast_fp16 = mul(x = var_1619_cast_fp16, y = layers_7_post_attention_layernorm_weight_to_fp16)[name = string("op_1620_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116477504))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623296))))[name = string("layers_7_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = var_1620_cast_fp16)[name = string("linear_53_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_1630_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1630_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623872))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122769664))))[name = string("layers_7_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_up_proj_weight_to_fp16_palettized, x = var_1620_cast_fp16)[name = string("linear_54_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_79_cast_fp16 = mul(x = var_1630_cast_fp16, y = linear_54_cast_fp16)[name = string("input_79_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122770240))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916032))))[name = string("layers_7_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("linear_55_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_207_cast_fp16 = add(x = x_201_cast_fp16, y = linear_55_cast_fp16)[name = string("x_207_cast_fp16")];
int32 var_1651 = const()[name = string("op_1651"), val = int32(-1)];
fp16 var_1650_promoted_to_fp16 = const()[name = string("op_1650_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1660_cast_fp16 = pow(x = x_207_cast_fp16, y = var_1650_promoted_to_fp16)[name = string("op_1660_cast_fp16")];
tensor<int32, [1]> var_1662_axes_0 = const()[name = string("op_1662_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1662_keep_dims_0 = const()[name = string("op_1662_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1662_cast_fp16 = reduce_mean(axes = var_1662_axes_0, keep_dims = var_1662_keep_dims_0, x = var_1660_cast_fp16)[name = string("op_1662_cast_fp16")];
fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1664_cast_fp16 = add(x = var_1662_cast_fp16, y = var_1663_to_fp16)[name = string("op_1664_cast_fp16")];
fp32 norm_65_epsilon_0 = const()[name = string("norm_65_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_65_cast_fp16 = rsqrt(epsilon = norm_65_epsilon_0, x = var_1664_cast_fp16)[name = string("norm_65_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1666_cast_fp16 = mul(x = x_207_cast_fp16, y = norm_65_cast_fp16)[name = string("op_1666_cast_fp16")];
tensor<fp16, [1024]> layers_8_input_layernorm_weight_to_fp16 = const()[name = string("layers_8_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916608)))];
tensor<fp16, [1, 128, 1024]> var_1667_cast_fp16 = mul(x = var_1666_cast_fp16, y = layers_8_input_layernorm_weight_to_fp16)[name = string("op_1667_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125918720))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128015936))))[name = string("layers_8_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = var_1667_cast_fp16)[name = string("linear_56_cast_fp16")];
tensor<int32, [4]> var_1683 = const()[name = string("op_1683"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_1684_cast_fp16 = reshape(shape = var_1683, x = linear_56_cast_fp16)[name = string("op_1684_cast_fp16")];
tensor<int32, [4]> x_213_perm_0 = const()[name = string("x_213_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128016512))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065152))))[name = string("layers_8_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = var_1667_cast_fp16)[name = string("linear_57_cast_fp16")];
tensor<int32, [4]> var_1688 = const()[name = string("op_1688"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1689_cast_fp16 = reshape(shape = var_1688, x = linear_57_cast_fp16)[name = string("op_1689_cast_fp16")];
tensor<int32, [4]> x_217_perm_0 = const()[name = string("x_217_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065728))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114368))))[name = string("layers_8_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = var_1667_cast_fp16)[name = string("linear_58_cast_fp16")];
tensor<int32, [4]> var_1693 = const()[name = string("op_1693"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1694_cast_fp16 = reshape(shape = var_1693, x = linear_58_cast_fp16)[name = string("op_1694_cast_fp16")];
tensor<int32, [4]> transpose_64_perm_0 = const()[name = string("transpose_64_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_1650_promoted_1_to_fp16 = const()[name = string("op_1650_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_213_cast_fp16 = transpose(perm = x_213_perm_0, x = var_1684_cast_fp16)[name = string("transpose_41")];
tensor<fp16, [1, 16, 128, 128]> var_1698_cast_fp16 = pow(x = x_213_cast_fp16, y = var_1650_promoted_1_to_fp16)[name = string("op_1698_cast_fp16")];
tensor<int32, [1]> var_1700_axes_0 = const()[name = string("op_1700_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1700_keep_dims_0 = const()[name = string("op_1700_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_1700_cast_fp16 = reduce_mean(axes = var_1700_axes_0, keep_dims = var_1700_keep_dims_0, x = var_1698_cast_fp16)[name = string("op_1700_cast_fp16")];
fp16 var_1701_to_fp16 = const()[name = string("op_1701_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_1702_cast_fp16 = add(x = var_1700_cast_fp16, y = var_1701_to_fp16)[name = string("op_1702_cast_fp16")];
fp32 norm_67_epsilon_0 = const()[name = string("norm_67_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_67_cast_fp16 = rsqrt(epsilon = norm_67_epsilon_0, x = var_1702_cast_fp16)[name = string("norm_67_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_1704_cast_fp16 = mul(x = x_213_cast_fp16, y = norm_67_cast_fp16)[name = string("op_1704_cast_fp16")];
tensor<fp16, [128]> layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114944)))];
tensor<fp16, [1, 16, 128, 128]> var_1705_cast_fp16 = mul(x = var_1704_cast_fp16, y = layers_8_self_attn_q_norm_weight_to_fp16)[name = string("op_1705_cast_fp16")];
fp16 var_1650_promoted_2_to_fp16 = const()[name = string("op_1650_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_217_cast_fp16 = transpose(perm = x_217_perm_0, x = var_1689_cast_fp16)[name = string("transpose_40")];
tensor<fp16, [1, 8, 128, 128]> var_1709_cast_fp16 = pow(x = x_217_cast_fp16, y = var_1650_promoted_2_to_fp16)[name = string("op_1709_cast_fp16")];
tensor<int32, [1]> var_1711_axes_0 = const()[name = string("op_1711_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1711_keep_dims_0 = const()[name = string("op_1711_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_1711_cast_fp16 = reduce_mean(axes = var_1711_axes_0, keep_dims = var_1711_keep_dims_0, x = var_1709_cast_fp16)[name = string("op_1711_cast_fp16")];
fp16 var_1712_to_fp16 = const()[name = string("op_1712_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_1713_cast_fp16 = add(x = var_1711_cast_fp16, y = var_1712_to_fp16)[name = string("op_1713_cast_fp16")];
fp32 norm_69_epsilon_0 = const()[name = string("norm_69_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_69_cast_fp16 = rsqrt(epsilon = norm_69_epsilon_0, x = var_1713_cast_fp16)[name = string("norm_69_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_1715_cast_fp16 = mul(x = x_217_cast_fp16, y = norm_69_cast_fp16)[name = string("op_1715_cast_fp16")];
tensor<fp16, [128]> layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115264)))];
tensor<fp16, [1, 8, 128, 128]> var_1716_cast_fp16 = mul(x = var_1715_cast_fp16, y = layers_8_self_attn_k_norm_weight_to_fp16)[name = string("op_1716_cast_fp16")];
tensor<int32, [4]> x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = var_1705_cast_fp16)[name = string("x1_33_cast_fp16")];
tensor<int32, [4]> x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = var_1705_cast_fp16)[name = string("x2_33_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1737_cast_fp16 = mul(x = x1_33_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1737_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1738_cast_fp16 = mul(x = x2_33_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1738_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1739_cast_fp16 = sub(x = var_1737_cast_fp16, y = var_1738_cast_fp16)[name = string("op_1739_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1740_cast_fp16 = mul(x = x2_33_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1740_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1741_cast_fp16 = mul(x = x1_33_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1741_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1742_cast_fp16 = add(x = var_1740_cast_fp16, y = var_1741_cast_fp16)[name = string("op_1742_cast_fp16")];
bool q_17_interleave_0 = const()[name = string("q_17_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_17_cast_fp16 = concat(axis = var_1651, interleave = q_17_interleave_0, values = (var_1739_cast_fp16, var_1742_cast_fp16))[name = string("q_17_cast_fp16")];
tensor<int32, [4]> x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = var_1716_cast_fp16)[name = string("x1_35_cast_fp16")];
tensor<int32, [4]> x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = var_1716_cast_fp16)[name = string("x2_35_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1764_cast_fp16 = mul(x = x1_35_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1764_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1765_cast_fp16 = mul(x = x2_35_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1765_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1766_cast_fp16 = sub(x = var_1764_cast_fp16, y = var_1765_cast_fp16)[name = string("op_1766_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1767_cast_fp16 = mul(x = x2_35_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1767_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1768_cast_fp16 = mul(x = x1_35_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1768_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1769_cast_fp16 = add(x = var_1767_cast_fp16, y = var_1768_cast_fp16)[name = string("op_1769_cast_fp16")];
bool var_1771_interleave_0 = const()[name = string("op_1771_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_1771_cast_fp16 = concat(axis = var_1651, interleave = var_1771_interleave_0, values = (var_1766_cast_fp16, var_1769_cast_fp16))[name = string("op_1771_cast_fp16")];
tensor<int32, [4]> transpose_33_perm_0 = const()[name = string("transpose_33_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_148 = const()[name = string("concat_148"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_33_cast_fp16 = transpose(perm = transpose_33_perm_0, x = var_1771_cast_fp16)[name = string("transpose_39")];
tensor<fp16, [128, 1024]> reshape_49_cast_fp16 = reshape(shape = concat_148, x = transpose_33_cast_fp16)[name = string("reshape_49_cast_fp16")];
bool matmul_16_transpose_x_1 = const()[name = string("matmul_16_transpose_x_1"), val = bool(true)];
bool matmul_16_transpose_y_1 = const()[name = string("matmul_16_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_1, transpose_y = matmul_16_transpose_y_1, x = var_68_to_fp16, y = reshape_49_cast_fp16)[name = string("matmul_16_cast_fp16")];
tensor<int32, [4]> concat_151 = const()[name = string("concat_151"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_50_cast_fp16 = reshape(shape = concat_151, x = matmul_16_cast_fp16)[name = string("reshape_50_cast_fp16")];
tensor<int32, [4]> scattered_k_17_perm_0 = const()[name = string("scattered_k_17_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_156 = const()[name = string("concat_156"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_64_cast_fp16 = transpose(perm = transpose_64_perm_0, x = var_1694_cast_fp16)[name = string("transpose_38")];
tensor<fp16, [128, 1024]> reshape_52_cast_fp16 = reshape(shape = concat_156, x = transpose_64_cast_fp16)[name = string("reshape_52_cast_fp16")];
bool matmul_17_transpose_x_1 = const()[name = string("matmul_17_transpose_x_1"), val = bool(true)];
bool matmul_17_transpose_y_1 = const()[name = string("matmul_17_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_1, transpose_y = matmul_17_transpose_y_1, x = var_68_to_fp16, y = reshape_52_cast_fp16)[name = string("matmul_17_cast_fp16")];
tensor<int32, [4]> concat_159 = const()[name = string("concat_159"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_53_cast_fp16 = reshape(shape = concat_159, x = matmul_17_cast_fp16)[name = string("reshape_53_cast_fp16")];
tensor<int32, [4]> scattered_v_17_perm_0 = const()[name = string("scattered_v_17_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_16 = read_state(input = k_cache_8)[name = string("read_state_16")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_51_cast_fp16 = mul(x = read_state_16, y = var_224_cast_fp16)[name = string("k_cache_51_cast_fp16")];
write_state(data = k_cache_51_cast_fp16, input = k_cache_8)[name = string("coreml_update_state_88_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_88 = read_state(input = k_cache_8)[name = string("coreml_update_state_88")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_17_cast_fp16 = transpose(perm = scattered_k_17_perm_0, x = reshape_50_cast_fp16)[name = string("transpose_37")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_53_cast_fp16 = add(x = coreml_update_state_88, y = scattered_k_17_cast_fp16)[name = string("k_cache_53_cast_fp16")];
write_state(data = k_cache_53_cast_fp16, input = k_cache_8)[name = string("coreml_update_state_89_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_89 = read_state(input = k_cache_8)[name = string("coreml_update_state_89")];
tensor<fp16, [1, 8, 1024, 128]> read_state_17 = read_state(input = v_cache_8)[name = string("read_state_17")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_51_cast_fp16 = mul(x = read_state_17, y = var_224_cast_fp16)[name = string("v_cache_51_cast_fp16")];
write_state(data = v_cache_51_cast_fp16, input = v_cache_8)[name = string("coreml_update_state_90_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_90 = read_state(input = v_cache_8)[name = string("coreml_update_state_90")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_17_cast_fp16 = transpose(perm = scattered_v_17_perm_0, x = reshape_53_cast_fp16)[name = string("transpose_36")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_53_cast_fp16 = add(x = coreml_update_state_90, y = scattered_v_17_cast_fp16)[name = string("v_cache_53_cast_fp16")];
write_state(data = v_cache_53_cast_fp16, input = v_cache_8)[name = string("coreml_update_state_91_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_91 = read_state(input = v_cache_8)[name = string("coreml_update_state_91")];
tensor<int32, [1]> var_1782_axes_0 = const()[name = string("op_1782_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1782_cast_fp16 = expand_dims(axes = var_1782_axes_0, x = coreml_update_state_89)[name = string("op_1782_cast_fp16")];
tensor<int32, [5]> k_exp_33_reps_0 = const()[name = string("k_exp_33_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_33_cast_fp16 = tile(reps = k_exp_33_reps_0, x = var_1782_cast_fp16)[name = string("k_exp_33_cast_fp16")];
tensor<int32, [4]> var_1785 = const()[name = string("op_1785"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_35_cast_fp16 = reshape(shape = var_1785, x = k_exp_33_cast_fp16)[name = string("k_exp_35_cast_fp16")];
tensor<int32, [1]> var_1787_axes_0 = const()[name = string("op_1787_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1787_cast_fp16 = expand_dims(axes = var_1787_axes_0, x = coreml_update_state_91)[name = string("op_1787_cast_fp16")];
tensor<int32, [5]> v_exp_33_reps_0 = const()[name = string("v_exp_33_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_33_cast_fp16 = tile(reps = v_exp_33_reps_0, x = var_1787_cast_fp16)[name = string("v_exp_33_cast_fp16")];
tensor<int32, [4]> var_1790 = const()[name = string("op_1790"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_35_cast_fp16 = reshape(shape = var_1790, x = v_exp_33_cast_fp16)[name = string("v_exp_35_cast_fp16")];
bool var_1793_transpose_x_1 = const()[name = string("op_1793_transpose_x_1"), val = bool(false)];
bool var_1793_transpose_y_1 = const()[name = string("op_1793_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_1793_cast_fp16 = matmul(transpose_x = var_1793_transpose_x_1, transpose_y = var_1793_transpose_y_1, x = q_17_cast_fp16, y = k_exp_35_cast_fp16)[name = string("op_1793_cast_fp16")];
fp16 var_1794_to_fp16 = const()[name = string("op_1794_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_33_cast_fp16 = mul(x = var_1793_cast_fp16, y = var_1794_to_fp16)[name = string("attn_33_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_81_cast_fp16 = add(x = attn_33_cast_fp16, y = attention_mask_to_fp16)[name = string("input_81_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_35_cast_fp16 = softmax(axis = var_1651, x = input_81_cast_fp16)[name = string("attn_35_cast_fp16")];
bool out_17_transpose_x_0 = const()[name = string("out_17_transpose_x_0"), val = bool(false)];
bool out_17_transpose_y_0 = const()[name = string("out_17_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_17_cast_fp16 = matmul(transpose_x = out_17_transpose_x_0, transpose_y = out_17_transpose_y_0, x = attn_35_cast_fp16, y = v_exp_35_cast_fp16)[name = string("out_17_cast_fp16")];
tensor<int32, [4]> var_1799_perm_0 = const()[name = string("op_1799_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1800 = const()[name = string("op_1800"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_1799_cast_fp16 = transpose(perm = var_1799_perm_0, x = out_17_cast_fp16)[name = string("transpose_35")];
tensor<fp16, [1, 128, 2048]> input_83_cast_fp16 = reshape(shape = var_1800, x = var_1799_cast_fp16)[name = string("input_83_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115584))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132212800))))[name = string("layers_8_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("linear_59_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_227_cast_fp16 = add(x = x_207_cast_fp16, y = linear_59_cast_fp16)[name = string("x_227_cast_fp16")];
fp16 var_1650_promoted_3_to_fp16 = const()[name = string("op_1650_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1807_cast_fp16 = pow(x = x_227_cast_fp16, y = var_1650_promoted_3_to_fp16)[name = string("op_1807_cast_fp16")];
tensor<int32, [1]> var_1809_axes_0 = const()[name = string("op_1809_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1809_keep_dims_0 = const()[name = string("op_1809_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1809_cast_fp16 = reduce_mean(axes = var_1809_axes_0, keep_dims = var_1809_keep_dims_0, x = var_1807_cast_fp16)[name = string("op_1809_cast_fp16")];
fp16 var_1810_to_fp16 = const()[name = string("op_1810_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1811_cast_fp16 = add(x = var_1809_cast_fp16, y = var_1810_to_fp16)[name = string("op_1811_cast_fp16")];
fp32 norm_71_epsilon_0 = const()[name = string("norm_71_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_71_cast_fp16 = rsqrt(epsilon = norm_71_epsilon_0, x = var_1811_cast_fp16)[name = string("norm_71_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1813_cast_fp16 = mul(x = x_227_cast_fp16, y = norm_71_cast_fp16)[name = string("op_1813_cast_fp16")];
tensor<fp16, [1024]> layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132213376)))];
tensor<fp16, [1, 128, 1024]> var_1814_cast_fp16 = mul(x = var_1813_cast_fp16, y = layers_8_post_attention_layernorm_weight_to_fp16)[name = string("op_1814_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132215488))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361280))))[name = string("layers_8_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = var_1814_cast_fp16)[name = string("linear_60_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_1824_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1824_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361856))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138507648))))[name = string("layers_8_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_up_proj_weight_to_fp16_palettized, x = var_1814_cast_fp16)[name = string("linear_61_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_89_cast_fp16 = mul(x = var_1824_cast_fp16, y = linear_61_cast_fp16)[name = string("input_89_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138508224))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654016))))[name = string("layers_8_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_62_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_233_cast_fp16 = add(x = x_227_cast_fp16, y = linear_62_cast_fp16)[name = string("x_233_cast_fp16")];
int32 var_1845 = const()[name = string("op_1845"), val = int32(-1)];
fp16 var_1844_promoted_to_fp16 = const()[name = string("op_1844_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_1854_cast_fp16 = pow(x = x_233_cast_fp16, y = var_1844_promoted_to_fp16)[name = string("op_1854_cast_fp16")];
tensor<int32, [1]> var_1856_axes_0 = const()[name = string("op_1856_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1856_keep_dims_0 = const()[name = string("op_1856_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_1856_cast_fp16 = reduce_mean(axes = var_1856_axes_0, keep_dims = var_1856_keep_dims_0, x = var_1854_cast_fp16)[name = string("op_1856_cast_fp16")];
fp16 var_1857_to_fp16 = const()[name = string("op_1857_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_1858_cast_fp16 = add(x = var_1856_cast_fp16, y = var_1857_to_fp16)[name = string("op_1858_cast_fp16")];
fp32 norm_73_epsilon_0 = const()[name = string("norm_73_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_73_cast_fp16 = rsqrt(epsilon = norm_73_epsilon_0, x = var_1858_cast_fp16)[name = string("norm_73_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_1860_cast_fp16 = mul(x = x_233_cast_fp16, y = norm_73_cast_fp16)[name = string("op_1860_cast_fp16")];
tensor<fp16, [1024]> layers_9_input_layernorm_weight_to_fp16 = const()[name = string("layers_9_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654592)))];
tensor<fp16, [1, 128, 1024]> var_1861_cast_fp16 = mul(x = var_1860_cast_fp16, y = layers_9_input_layernorm_weight_to_fp16)[name = string("op_1861_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141656704))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143753920))))[name = string("layers_9_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("linear_63_cast_fp16")];
tensor<int32, [4]> var_1877 = const()[name = string("op_1877"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_1878_cast_fp16 = reshape(shape = var_1877, x = linear_63_cast_fp16)[name = string("op_1878_cast_fp16")];
tensor<int32, [4]> x_239_perm_0 = const()[name = string("x_239_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143754496))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803136))))[name = string("layers_9_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("linear_64_cast_fp16")];
tensor<int32, [4]> var_1882 = const()[name = string("op_1882"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1883_cast_fp16 = reshape(shape = var_1882, x = linear_64_cast_fp16)[name = string("op_1883_cast_fp16")];
tensor<int32, [4]> x_243_perm_0 = const()[name = string("x_243_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803712))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852352))))[name = string("layers_9_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = var_1861_cast_fp16)[name = string("linear_65_cast_fp16")];
tensor<int32, [4]> var_1887 = const()[name = string("op_1887"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_1888_cast_fp16 = reshape(shape = var_1887, x = linear_65_cast_fp16)[name = string("op_1888_cast_fp16")];
tensor<int32, [4]> transpose_65_perm_0 = const()[name = string("transpose_65_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_1844_promoted_1_to_fp16 = const()[name = string("op_1844_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_239_cast_fp16 = transpose(perm = x_239_perm_0, x = var_1878_cast_fp16)[name = string("transpose_34")];
tensor<fp16, [1, 16, 128, 128]> var_1892_cast_fp16 = pow(x = x_239_cast_fp16, y = var_1844_promoted_1_to_fp16)[name = string("op_1892_cast_fp16")];
tensor<int32, [1]> var_1894_axes_0 = const()[name = string("op_1894_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1894_keep_dims_0 = const()[name = string("op_1894_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_1894_cast_fp16 = reduce_mean(axes = var_1894_axes_0, keep_dims = var_1894_keep_dims_0, x = var_1892_cast_fp16)[name = string("op_1894_cast_fp16")];
fp16 var_1895_to_fp16 = const()[name = string("op_1895_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_1896_cast_fp16 = add(x = var_1894_cast_fp16, y = var_1895_to_fp16)[name = string("op_1896_cast_fp16")];
fp32 norm_75_epsilon_0 = const()[name = string("norm_75_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_75_cast_fp16 = rsqrt(epsilon = norm_75_epsilon_0, x = var_1896_cast_fp16)[name = string("norm_75_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_1898_cast_fp16 = mul(x = x_239_cast_fp16, y = norm_75_cast_fp16)[name = string("op_1898_cast_fp16")];
tensor<fp16, [128]> layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852928)))];
tensor<fp16, [1, 16, 128, 128]> var_1899_cast_fp16 = mul(x = var_1898_cast_fp16, y = layers_9_self_attn_q_norm_weight_to_fp16)[name = string("op_1899_cast_fp16")];
fp16 var_1844_promoted_2_to_fp16 = const()[name = string("op_1844_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_243_cast_fp16 = transpose(perm = x_243_perm_0, x = var_1883_cast_fp16)[name = string("transpose_33")];
tensor<fp16, [1, 8, 128, 128]> var_1903_cast_fp16 = pow(x = x_243_cast_fp16, y = var_1844_promoted_2_to_fp16)[name = string("op_1903_cast_fp16")];
tensor<int32, [1]> var_1905_axes_0 = const()[name = string("op_1905_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1905_keep_dims_0 = const()[name = string("op_1905_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_1905_cast_fp16 = reduce_mean(axes = var_1905_axes_0, keep_dims = var_1905_keep_dims_0, x = var_1903_cast_fp16)[name = string("op_1905_cast_fp16")];
fp16 var_1906_to_fp16 = const()[name = string("op_1906_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_1907_cast_fp16 = add(x = var_1905_cast_fp16, y = var_1906_to_fp16)[name = string("op_1907_cast_fp16")];
fp32 norm_77_epsilon_0 = const()[name = string("norm_77_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_77_cast_fp16 = rsqrt(epsilon = norm_77_epsilon_0, x = var_1907_cast_fp16)[name = string("norm_77_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_1909_cast_fp16 = mul(x = x_243_cast_fp16, y = norm_77_cast_fp16)[name = string("op_1909_cast_fp16")];
tensor<fp16, [128]> layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853248)))];
tensor<fp16, [1, 8, 128, 128]> var_1910_cast_fp16 = mul(x = var_1909_cast_fp16, y = layers_9_self_attn_k_norm_weight_to_fp16)[name = string("op_1910_cast_fp16")];
tensor<int32, [4]> x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = var_1899_cast_fp16)[name = string("x1_37_cast_fp16")];
tensor<int32, [4]> x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = var_1899_cast_fp16)[name = string("x2_37_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1931_cast_fp16 = mul(x = x1_37_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1931_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1932_cast_fp16 = mul(x = x2_37_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1932_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1933_cast_fp16 = sub(x = var_1931_cast_fp16, y = var_1932_cast_fp16)[name = string("op_1933_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1934_cast_fp16 = mul(x = x2_37_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1934_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1935_cast_fp16 = mul(x = x1_37_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1935_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_1936_cast_fp16 = add(x = var_1934_cast_fp16, y = var_1935_cast_fp16)[name = string("op_1936_cast_fp16")];
bool q_19_interleave_0 = const()[name = string("q_19_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_19_cast_fp16 = concat(axis = var_1845, interleave = q_19_interleave_0, values = (var_1933_cast_fp16, var_1936_cast_fp16))[name = string("q_19_cast_fp16")];
tensor<int32, [4]> x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = var_1910_cast_fp16)[name = string("x1_39_cast_fp16")];
tensor<int32, [4]> x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = var_1910_cast_fp16)[name = string("x2_39_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1958_cast_fp16 = mul(x = x1_39_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1958_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1959_cast_fp16 = mul(x = x2_39_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1959_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1960_cast_fp16 = sub(x = var_1958_cast_fp16, y = var_1959_cast_fp16)[name = string("op_1960_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1961_cast_fp16 = mul(x = x2_39_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_1961_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1962_cast_fp16 = mul(x = x1_39_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_1962_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_1963_cast_fp16 = add(x = var_1961_cast_fp16, y = var_1962_cast_fp16)[name = string("op_1963_cast_fp16")];
bool var_1965_interleave_0 = const()[name = string("op_1965_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_1965_cast_fp16 = concat(axis = var_1845, interleave = var_1965_interleave_0, values = (var_1960_cast_fp16, var_1963_cast_fp16))[name = string("op_1965_cast_fp16")];
tensor<int32, [4]> transpose_37_perm_0 = const()[name = string("transpose_37_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_166 = const()[name = string("concat_166"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_37_cast_fp16 = transpose(perm = transpose_37_perm_0, x = var_1965_cast_fp16)[name = string("transpose_32")];
tensor<fp16, [128, 1024]> reshape_55_cast_fp16 = reshape(shape = concat_166, x = transpose_37_cast_fp16)[name = string("reshape_55_cast_fp16")];
bool matmul_18_transpose_x_1 = const()[name = string("matmul_18_transpose_x_1"), val = bool(true)];
bool matmul_18_transpose_y_1 = const()[name = string("matmul_18_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_1, transpose_y = matmul_18_transpose_y_1, x = var_68_to_fp16, y = reshape_55_cast_fp16)[name = string("matmul_18_cast_fp16")];
tensor<int32, [4]> concat_169 = const()[name = string("concat_169"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_56_cast_fp16 = reshape(shape = concat_169, x = matmul_18_cast_fp16)[name = string("reshape_56_cast_fp16")];
tensor<int32, [4]> scattered_k_19_perm_0 = const()[name = string("scattered_k_19_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_174 = const()[name = string("concat_174"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_65_cast_fp16 = transpose(perm = transpose_65_perm_0, x = var_1888_cast_fp16)[name = string("transpose_31")];
tensor<fp16, [128, 1024]> reshape_58_cast_fp16 = reshape(shape = concat_174, x = transpose_65_cast_fp16)[name = string("reshape_58_cast_fp16")];
bool matmul_19_transpose_x_1 = const()[name = string("matmul_19_transpose_x_1"), val = bool(true)];
bool matmul_19_transpose_y_1 = const()[name = string("matmul_19_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_1, transpose_y = matmul_19_transpose_y_1, x = var_68_to_fp16, y = reshape_58_cast_fp16)[name = string("matmul_19_cast_fp16")];
tensor<int32, [4]> concat_177 = const()[name = string("concat_177"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_59_cast_fp16 = reshape(shape = concat_177, x = matmul_19_cast_fp16)[name = string("reshape_59_cast_fp16")];
tensor<int32, [4]> scattered_v_19_perm_0 = const()[name = string("scattered_v_19_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_18 = read_state(input = k_cache_9)[name = string("read_state_18")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_57_cast_fp16 = mul(x = read_state_18, y = var_224_cast_fp16)[name = string("k_cache_57_cast_fp16")];
write_state(data = k_cache_57_cast_fp16, input = k_cache_9)[name = string("coreml_update_state_92_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_92 = read_state(input = k_cache_9)[name = string("coreml_update_state_92")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_19_cast_fp16 = transpose(perm = scattered_k_19_perm_0, x = reshape_56_cast_fp16)[name = string("transpose_30")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_59_cast_fp16 = add(x = coreml_update_state_92, y = scattered_k_19_cast_fp16)[name = string("k_cache_59_cast_fp16")];
write_state(data = k_cache_59_cast_fp16, input = k_cache_9)[name = string("coreml_update_state_93_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_93 = read_state(input = k_cache_9)[name = string("coreml_update_state_93")];
tensor<fp16, [1, 8, 1024, 128]> read_state_19 = read_state(input = v_cache_9)[name = string("read_state_19")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_57_cast_fp16 = mul(x = read_state_19, y = var_224_cast_fp16)[name = string("v_cache_57_cast_fp16")];
write_state(data = v_cache_57_cast_fp16, input = v_cache_9)[name = string("coreml_update_state_94_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_94 = read_state(input = v_cache_9)[name = string("coreml_update_state_94")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_19_cast_fp16 = transpose(perm = scattered_v_19_perm_0, x = reshape_59_cast_fp16)[name = string("transpose_29")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_59_cast_fp16 = add(x = coreml_update_state_94, y = scattered_v_19_cast_fp16)[name = string("v_cache_59_cast_fp16")];
write_state(data = v_cache_59_cast_fp16, input = v_cache_9)[name = string("coreml_update_state_95_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_95 = read_state(input = v_cache_9)[name = string("coreml_update_state_95")];
tensor<int32, [1]> var_1976_axes_0 = const()[name = string("op_1976_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1976_cast_fp16 = expand_dims(axes = var_1976_axes_0, x = coreml_update_state_93)[name = string("op_1976_cast_fp16")];
tensor<int32, [5]> k_exp_37_reps_0 = const()[name = string("k_exp_37_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_37_cast_fp16 = tile(reps = k_exp_37_reps_0, x = var_1976_cast_fp16)[name = string("k_exp_37_cast_fp16")];
tensor<int32, [4]> var_1979 = const()[name = string("op_1979"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_39_cast_fp16 = reshape(shape = var_1979, x = k_exp_37_cast_fp16)[name = string("k_exp_39_cast_fp16")];
tensor<int32, [1]> var_1981_axes_0 = const()[name = string("op_1981_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_1981_cast_fp16 = expand_dims(axes = var_1981_axes_0, x = coreml_update_state_95)[name = string("op_1981_cast_fp16")];
tensor<int32, [5]> v_exp_37_reps_0 = const()[name = string("v_exp_37_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_37_cast_fp16 = tile(reps = v_exp_37_reps_0, x = var_1981_cast_fp16)[name = string("v_exp_37_cast_fp16")];
tensor<int32, [4]> var_1984 = const()[name = string("op_1984"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_39_cast_fp16 = reshape(shape = var_1984, x = v_exp_37_cast_fp16)[name = string("v_exp_39_cast_fp16")];
bool var_1987_transpose_x_1 = const()[name = string("op_1987_transpose_x_1"), val = bool(false)];
bool var_1987_transpose_y_1 = const()[name = string("op_1987_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_1987_cast_fp16 = matmul(transpose_x = var_1987_transpose_x_1, transpose_y = var_1987_transpose_y_1, x = q_19_cast_fp16, y = k_exp_39_cast_fp16)[name = string("op_1987_cast_fp16")];
fp16 var_1988_to_fp16 = const()[name = string("op_1988_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_37_cast_fp16 = mul(x = var_1987_cast_fp16, y = var_1988_to_fp16)[name = string("attn_37_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_91_cast_fp16 = add(x = attn_37_cast_fp16, y = attention_mask_to_fp16)[name = string("input_91_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_39_cast_fp16 = softmax(axis = var_1845, x = input_91_cast_fp16)[name = string("attn_39_cast_fp16")];
bool out_19_transpose_x_0 = const()[name = string("out_19_transpose_x_0"), val = bool(false)];
bool out_19_transpose_y_0 = const()[name = string("out_19_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_19_cast_fp16 = matmul(transpose_x = out_19_transpose_x_0, transpose_y = out_19_transpose_y_0, x = attn_39_cast_fp16, y = v_exp_39_cast_fp16)[name = string("out_19_cast_fp16")];
tensor<int32, [4]> var_1993_perm_0 = const()[name = string("op_1993_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1994 = const()[name = string("op_1994"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_1993_cast_fp16 = transpose(perm = var_1993_perm_0, x = out_19_cast_fp16)[name = string("transpose_28")];
tensor<fp16, [1, 128, 2048]> input_93_cast_fp16 = reshape(shape = var_1994, x = var_1993_cast_fp16)[name = string("input_93_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853568))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147950784))))[name = string("layers_9_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("linear_66_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_253_cast_fp16 = add(x = x_233_cast_fp16, y = linear_66_cast_fp16)[name = string("x_253_cast_fp16")];
fp16 var_1844_promoted_3_to_fp16 = const()[name = string("op_1844_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2001_cast_fp16 = pow(x = x_253_cast_fp16, y = var_1844_promoted_3_to_fp16)[name = string("op_2001_cast_fp16")];
tensor<int32, [1]> var_2003_axes_0 = const()[name = string("op_2003_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2003_keep_dims_0 = const()[name = string("op_2003_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2003_cast_fp16 = reduce_mean(axes = var_2003_axes_0, keep_dims = var_2003_keep_dims_0, x = var_2001_cast_fp16)[name = string("op_2003_cast_fp16")];
fp16 var_2004_to_fp16 = const()[name = string("op_2004_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2005_cast_fp16 = add(x = var_2003_cast_fp16, y = var_2004_to_fp16)[name = string("op_2005_cast_fp16")];
fp32 norm_79_epsilon_0 = const()[name = string("norm_79_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_79_cast_fp16 = rsqrt(epsilon = norm_79_epsilon_0, x = var_2005_cast_fp16)[name = string("norm_79_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2007_cast_fp16 = mul(x = x_253_cast_fp16, y = norm_79_cast_fp16)[name = string("op_2007_cast_fp16")];
tensor<fp16, [1024]> layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147951360)))];
tensor<fp16, [1, 128, 1024]> var_2008_cast_fp16 = mul(x = var_2007_cast_fp16, y = layers_9_post_attention_layernorm_weight_to_fp16)[name = string("op_2008_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147953472))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099264))))[name = string("layers_9_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = var_2008_cast_fp16)[name = string("linear_67_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_2018_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_2018_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099840))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154245632))))[name = string("layers_9_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_up_proj_weight_to_fp16_palettized, x = var_2008_cast_fp16)[name = string("linear_68_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_99_cast_fp16 = mul(x = var_2018_cast_fp16, y = linear_68_cast_fp16)[name = string("input_99_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154246208))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392000))))[name = string("layers_9_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("linear_69_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_259_cast_fp16 = add(x = x_253_cast_fp16, y = linear_69_cast_fp16)[name = string("x_259_cast_fp16")];
int32 var_2039 = const()[name = string("op_2039"), val = int32(-1)];
fp16 var_2038_promoted_to_fp16 = const()[name = string("op_2038_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2048_cast_fp16 = pow(x = x_259_cast_fp16, y = var_2038_promoted_to_fp16)[name = string("op_2048_cast_fp16")];
tensor<int32, [1]> var_2050_axes_0 = const()[name = string("op_2050_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2050_keep_dims_0 = const()[name = string("op_2050_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2050_cast_fp16 = reduce_mean(axes = var_2050_axes_0, keep_dims = var_2050_keep_dims_0, x = var_2048_cast_fp16)[name = string("op_2050_cast_fp16")];
fp16 var_2051_to_fp16 = const()[name = string("op_2051_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2052_cast_fp16 = add(x = var_2050_cast_fp16, y = var_2051_to_fp16)[name = string("op_2052_cast_fp16")];
fp32 norm_81_epsilon_0 = const()[name = string("norm_81_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_81_cast_fp16 = rsqrt(epsilon = norm_81_epsilon_0, x = var_2052_cast_fp16)[name = string("norm_81_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2054_cast_fp16 = mul(x = x_259_cast_fp16, y = norm_81_cast_fp16)[name = string("op_2054_cast_fp16")];
tensor<fp16, [1024]> layers_10_input_layernorm_weight_to_fp16 = const()[name = string("layers_10_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392576)))];
tensor<fp16, [1, 128, 1024]> var_2055_cast_fp16 = mul(x = var_2054_cast_fp16, y = layers_10_input_layernorm_weight_to_fp16)[name = string("op_2055_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157394688))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159491904))))[name = string("layers_10_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = var_2055_cast_fp16)[name = string("linear_70_cast_fp16")];
tensor<int32, [4]> var_2071 = const()[name = string("op_2071"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_2072_cast_fp16 = reshape(shape = var_2071, x = linear_70_cast_fp16)[name = string("op_2072_cast_fp16")];
tensor<int32, [4]> x_265_perm_0 = const()[name = string("x_265_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159492480))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541120))))[name = string("layers_10_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = var_2055_cast_fp16)[name = string("linear_71_cast_fp16")];
tensor<int32, [4]> var_2076 = const()[name = string("op_2076"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2077_cast_fp16 = reshape(shape = var_2076, x = linear_71_cast_fp16)[name = string("op_2077_cast_fp16")];
tensor<int32, [4]> x_269_perm_0 = const()[name = string("x_269_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541696))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590336))))[name = string("layers_10_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = var_2055_cast_fp16)[name = string("linear_72_cast_fp16")];
tensor<int32, [4]> var_2081 = const()[name = string("op_2081"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2082_cast_fp16 = reshape(shape = var_2081, x = linear_72_cast_fp16)[name = string("op_2082_cast_fp16")];
tensor<int32, [4]> transpose_66_perm_0 = const()[name = string("transpose_66_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_2038_promoted_1_to_fp16 = const()[name = string("op_2038_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_265_cast_fp16 = transpose(perm = x_265_perm_0, x = var_2072_cast_fp16)[name = string("transpose_27")];
tensor<fp16, [1, 16, 128, 128]> var_2086_cast_fp16 = pow(x = x_265_cast_fp16, y = var_2038_promoted_1_to_fp16)[name = string("op_2086_cast_fp16")];
tensor<int32, [1]> var_2088_axes_0 = const()[name = string("op_2088_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2088_keep_dims_0 = const()[name = string("op_2088_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_2088_cast_fp16 = reduce_mean(axes = var_2088_axes_0, keep_dims = var_2088_keep_dims_0, x = var_2086_cast_fp16)[name = string("op_2088_cast_fp16")];
fp16 var_2089_to_fp16 = const()[name = string("op_2089_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_2090_cast_fp16 = add(x = var_2088_cast_fp16, y = var_2089_to_fp16)[name = string("op_2090_cast_fp16")];
fp32 norm_83_epsilon_0 = const()[name = string("norm_83_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_83_cast_fp16 = rsqrt(epsilon = norm_83_epsilon_0, x = var_2090_cast_fp16)[name = string("norm_83_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_2092_cast_fp16 = mul(x = x_265_cast_fp16, y = norm_83_cast_fp16)[name = string("op_2092_cast_fp16")];
tensor<fp16, [128]> layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590912)))];
tensor<fp16, [1, 16, 128, 128]> var_2093_cast_fp16 = mul(x = var_2092_cast_fp16, y = layers_10_self_attn_q_norm_weight_to_fp16)[name = string("op_2093_cast_fp16")];
fp16 var_2038_promoted_2_to_fp16 = const()[name = string("op_2038_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_269_cast_fp16 = transpose(perm = x_269_perm_0, x = var_2077_cast_fp16)[name = string("transpose_26")];
tensor<fp16, [1, 8, 128, 128]> var_2097_cast_fp16 = pow(x = x_269_cast_fp16, y = var_2038_promoted_2_to_fp16)[name = string("op_2097_cast_fp16")];
tensor<int32, [1]> var_2099_axes_0 = const()[name = string("op_2099_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2099_keep_dims_0 = const()[name = string("op_2099_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_2099_cast_fp16 = reduce_mean(axes = var_2099_axes_0, keep_dims = var_2099_keep_dims_0, x = var_2097_cast_fp16)[name = string("op_2099_cast_fp16")];
fp16 var_2100_to_fp16 = const()[name = string("op_2100_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_2101_cast_fp16 = add(x = var_2099_cast_fp16, y = var_2100_to_fp16)[name = string("op_2101_cast_fp16")];
fp32 norm_85_epsilon_0 = const()[name = string("norm_85_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_85_cast_fp16 = rsqrt(epsilon = norm_85_epsilon_0, x = var_2101_cast_fp16)[name = string("norm_85_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_2103_cast_fp16 = mul(x = x_269_cast_fp16, y = norm_85_cast_fp16)[name = string("op_2103_cast_fp16")];
tensor<fp16, [128]> layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591232)))];
tensor<fp16, [1, 8, 128, 128]> var_2104_cast_fp16 = mul(x = var_2103_cast_fp16, y = layers_10_self_attn_k_norm_weight_to_fp16)[name = string("op_2104_cast_fp16")];
tensor<int32, [4]> x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = var_2093_cast_fp16)[name = string("x1_41_cast_fp16")];
tensor<int32, [4]> x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = var_2093_cast_fp16)[name = string("x2_41_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2125_cast_fp16 = mul(x = x1_41_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2125_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2126_cast_fp16 = mul(x = x2_41_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2126_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2127_cast_fp16 = sub(x = var_2125_cast_fp16, y = var_2126_cast_fp16)[name = string("op_2127_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2128_cast_fp16 = mul(x = x2_41_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2128_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2129_cast_fp16 = mul(x = x1_41_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2129_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2130_cast_fp16 = add(x = var_2128_cast_fp16, y = var_2129_cast_fp16)[name = string("op_2130_cast_fp16")];
bool q_21_interleave_0 = const()[name = string("q_21_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_21_cast_fp16 = concat(axis = var_2039, interleave = q_21_interleave_0, values = (var_2127_cast_fp16, var_2130_cast_fp16))[name = string("q_21_cast_fp16")];
tensor<int32, [4]> x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = var_2104_cast_fp16)[name = string("x1_43_cast_fp16")];
tensor<int32, [4]> x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = var_2104_cast_fp16)[name = string("x2_43_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2152_cast_fp16 = mul(x = x1_43_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2152_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2153_cast_fp16 = mul(x = x2_43_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2153_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2154_cast_fp16 = sub(x = var_2152_cast_fp16, y = var_2153_cast_fp16)[name = string("op_2154_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2155_cast_fp16 = mul(x = x2_43_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2155_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2156_cast_fp16 = mul(x = x1_43_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2156_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2157_cast_fp16 = add(x = var_2155_cast_fp16, y = var_2156_cast_fp16)[name = string("op_2157_cast_fp16")];
bool var_2159_interleave_0 = const()[name = string("op_2159_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_2159_cast_fp16 = concat(axis = var_2039, interleave = var_2159_interleave_0, values = (var_2154_cast_fp16, var_2157_cast_fp16))[name = string("op_2159_cast_fp16")];
tensor<int32, [4]> transpose_41_perm_0 = const()[name = string("transpose_41_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_184 = const()[name = string("concat_184"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_41_cast_fp16 = transpose(perm = transpose_41_perm_0, x = var_2159_cast_fp16)[name = string("transpose_25")];
tensor<fp16, [128, 1024]> reshape_61_cast_fp16 = reshape(shape = concat_184, x = transpose_41_cast_fp16)[name = string("reshape_61_cast_fp16")];
bool matmul_20_transpose_x_1 = const()[name = string("matmul_20_transpose_x_1"), val = bool(true)];
bool matmul_20_transpose_y_1 = const()[name = string("matmul_20_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_1, transpose_y = matmul_20_transpose_y_1, x = var_68_to_fp16, y = reshape_61_cast_fp16)[name = string("matmul_20_cast_fp16")];
tensor<int32, [4]> concat_187 = const()[name = string("concat_187"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_62_cast_fp16 = reshape(shape = concat_187, x = matmul_20_cast_fp16)[name = string("reshape_62_cast_fp16")];
tensor<int32, [4]> scattered_k_21_perm_0 = const()[name = string("scattered_k_21_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_192 = const()[name = string("concat_192"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_66_cast_fp16 = transpose(perm = transpose_66_perm_0, x = var_2082_cast_fp16)[name = string("transpose_24")];
tensor<fp16, [128, 1024]> reshape_64_cast_fp16 = reshape(shape = concat_192, x = transpose_66_cast_fp16)[name = string("reshape_64_cast_fp16")];
bool matmul_21_transpose_x_1 = const()[name = string("matmul_21_transpose_x_1"), val = bool(true)];
bool matmul_21_transpose_y_1 = const()[name = string("matmul_21_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_1, transpose_y = matmul_21_transpose_y_1, x = var_68_to_fp16, y = reshape_64_cast_fp16)[name = string("matmul_21_cast_fp16")];
tensor<int32, [4]> concat_195 = const()[name = string("concat_195"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_65_cast_fp16 = reshape(shape = concat_195, x = matmul_21_cast_fp16)[name = string("reshape_65_cast_fp16")];
tensor<int32, [4]> scattered_v_21_perm_0 = const()[name = string("scattered_v_21_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_20 = read_state(input = k_cache_10)[name = string("read_state_20")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_63_cast_fp16 = mul(x = read_state_20, y = var_224_cast_fp16)[name = string("k_cache_63_cast_fp16")];
write_state(data = k_cache_63_cast_fp16, input = k_cache_10)[name = string("coreml_update_state_96_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_96 = read_state(input = k_cache_10)[name = string("coreml_update_state_96")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_21_cast_fp16 = transpose(perm = scattered_k_21_perm_0, x = reshape_62_cast_fp16)[name = string("transpose_23")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_65_cast_fp16 = add(x = coreml_update_state_96, y = scattered_k_21_cast_fp16)[name = string("k_cache_65_cast_fp16")];
write_state(data = k_cache_65_cast_fp16, input = k_cache_10)[name = string("coreml_update_state_97_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_97 = read_state(input = k_cache_10)[name = string("coreml_update_state_97")];
tensor<fp16, [1, 8, 1024, 128]> read_state_21 = read_state(input = v_cache_10)[name = string("read_state_21")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_63_cast_fp16 = mul(x = read_state_21, y = var_224_cast_fp16)[name = string("v_cache_63_cast_fp16")];
write_state(data = v_cache_63_cast_fp16, input = v_cache_10)[name = string("coreml_update_state_98_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_98 = read_state(input = v_cache_10)[name = string("coreml_update_state_98")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_21_cast_fp16 = transpose(perm = scattered_v_21_perm_0, x = reshape_65_cast_fp16)[name = string("transpose_22")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_65_cast_fp16 = add(x = coreml_update_state_98, y = scattered_v_21_cast_fp16)[name = string("v_cache_65_cast_fp16")];
write_state(data = v_cache_65_cast_fp16, input = v_cache_10)[name = string("coreml_update_state_99_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_99 = read_state(input = v_cache_10)[name = string("coreml_update_state_99")];
tensor<int32, [1]> var_2170_axes_0 = const()[name = string("op_2170_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2170_cast_fp16 = expand_dims(axes = var_2170_axes_0, x = coreml_update_state_97)[name = string("op_2170_cast_fp16")];
tensor<int32, [5]> k_exp_41_reps_0 = const()[name = string("k_exp_41_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_41_cast_fp16 = tile(reps = k_exp_41_reps_0, x = var_2170_cast_fp16)[name = string("k_exp_41_cast_fp16")];
tensor<int32, [4]> var_2173 = const()[name = string("op_2173"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_43_cast_fp16 = reshape(shape = var_2173, x = k_exp_41_cast_fp16)[name = string("k_exp_43_cast_fp16")];
tensor<int32, [1]> var_2175_axes_0 = const()[name = string("op_2175_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2175_cast_fp16 = expand_dims(axes = var_2175_axes_0, x = coreml_update_state_99)[name = string("op_2175_cast_fp16")];
tensor<int32, [5]> v_exp_41_reps_0 = const()[name = string("v_exp_41_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_41_cast_fp16 = tile(reps = v_exp_41_reps_0, x = var_2175_cast_fp16)[name = string("v_exp_41_cast_fp16")];
tensor<int32, [4]> var_2178 = const()[name = string("op_2178"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_43_cast_fp16 = reshape(shape = var_2178, x = v_exp_41_cast_fp16)[name = string("v_exp_43_cast_fp16")];
bool var_2181_transpose_x_1 = const()[name = string("op_2181_transpose_x_1"), val = bool(false)];
bool var_2181_transpose_y_1 = const()[name = string("op_2181_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_2181_cast_fp16 = matmul(transpose_x = var_2181_transpose_x_1, transpose_y = var_2181_transpose_y_1, x = q_21_cast_fp16, y = k_exp_43_cast_fp16)[name = string("op_2181_cast_fp16")];
fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_41_cast_fp16 = mul(x = var_2181_cast_fp16, y = var_2182_to_fp16)[name = string("attn_41_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_101_cast_fp16 = add(x = attn_41_cast_fp16, y = attention_mask_to_fp16)[name = string("input_101_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_43_cast_fp16 = softmax(axis = var_2039, x = input_101_cast_fp16)[name = string("attn_43_cast_fp16")];
bool out_21_transpose_x_0 = const()[name = string("out_21_transpose_x_0"), val = bool(false)];
bool out_21_transpose_y_0 = const()[name = string("out_21_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_21_cast_fp16 = matmul(transpose_x = out_21_transpose_x_0, transpose_y = out_21_transpose_y_0, x = attn_43_cast_fp16, y = v_exp_43_cast_fp16)[name = string("out_21_cast_fp16")];
tensor<int32, [4]> var_2187_perm_0 = const()[name = string("op_2187_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2188 = const()[name = string("op_2188"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_2187_cast_fp16 = transpose(perm = var_2187_perm_0, x = out_21_cast_fp16)[name = string("transpose_21")];
tensor<fp16, [1, 128, 2048]> input_103_cast_fp16 = reshape(shape = var_2188, x = var_2187_cast_fp16)[name = string("input_103_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591552))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163688768))))[name = string("layers_10_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_73_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_279_cast_fp16 = add(x = x_259_cast_fp16, y = linear_73_cast_fp16)[name = string("x_279_cast_fp16")];
fp16 var_2038_promoted_3_to_fp16 = const()[name = string("op_2038_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2195_cast_fp16 = pow(x = x_279_cast_fp16, y = var_2038_promoted_3_to_fp16)[name = string("op_2195_cast_fp16")];
tensor<int32, [1]> var_2197_axes_0 = const()[name = string("op_2197_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2197_keep_dims_0 = const()[name = string("op_2197_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2197_cast_fp16 = reduce_mean(axes = var_2197_axes_0, keep_dims = var_2197_keep_dims_0, x = var_2195_cast_fp16)[name = string("op_2197_cast_fp16")];
fp16 var_2198_to_fp16 = const()[name = string("op_2198_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2199_cast_fp16 = add(x = var_2197_cast_fp16, y = var_2198_to_fp16)[name = string("op_2199_cast_fp16")];
fp32 norm_87_epsilon_0 = const()[name = string("norm_87_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_87_cast_fp16 = rsqrt(epsilon = norm_87_epsilon_0, x = var_2199_cast_fp16)[name = string("norm_87_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2201_cast_fp16 = mul(x = x_279_cast_fp16, y = norm_87_cast_fp16)[name = string("op_2201_cast_fp16")];
tensor<fp16, [1024]> layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689344)))];
tensor<fp16, [1, 128, 1024]> var_2202_cast_fp16 = mul(x = var_2201_cast_fp16, y = layers_10_post_attention_layernorm_weight_to_fp16)[name = string("op_2202_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163691456))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837248))))[name = string("layers_10_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = var_2202_cast_fp16)[name = string("linear_74_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_2212_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2212_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837824))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169983616))))[name = string("layers_10_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_up_proj_weight_to_fp16_palettized, x = var_2202_cast_fp16)[name = string("linear_75_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_109_cast_fp16 = mul(x = var_2212_cast_fp16, y = linear_75_cast_fp16)[name = string("input_109_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169984192))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173129984))))[name = string("layers_10_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("linear_76_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_285_cast_fp16 = add(x = x_279_cast_fp16, y = linear_76_cast_fp16)[name = string("x_285_cast_fp16")];
int32 var_2233 = const()[name = string("op_2233"), val = int32(-1)];
fp16 var_2232_promoted_to_fp16 = const()[name = string("op_2232_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2242_cast_fp16 = pow(x = x_285_cast_fp16, y = var_2232_promoted_to_fp16)[name = string("op_2242_cast_fp16")];
tensor<int32, [1]> var_2244_axes_0 = const()[name = string("op_2244_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2244_keep_dims_0 = const()[name = string("op_2244_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2244_cast_fp16 = reduce_mean(axes = var_2244_axes_0, keep_dims = var_2244_keep_dims_0, x = var_2242_cast_fp16)[name = string("op_2244_cast_fp16")];
fp16 var_2245_to_fp16 = const()[name = string("op_2245_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2246_cast_fp16 = add(x = var_2244_cast_fp16, y = var_2245_to_fp16)[name = string("op_2246_cast_fp16")];
fp32 norm_89_epsilon_0 = const()[name = string("norm_89_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_89_cast_fp16 = rsqrt(epsilon = norm_89_epsilon_0, x = var_2246_cast_fp16)[name = string("norm_89_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2248_cast_fp16 = mul(x = x_285_cast_fp16, y = norm_89_cast_fp16)[name = string("op_2248_cast_fp16")];
tensor<fp16, [1024]> layers_11_input_layernorm_weight_to_fp16 = const()[name = string("layers_11_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173130560)))];
tensor<fp16, [1, 128, 1024]> var_2249_cast_fp16 = mul(x = var_2248_cast_fp16, y = layers_11_input_layernorm_weight_to_fp16)[name = string("op_2249_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173132672))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175229888))))[name = string("layers_11_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = var_2249_cast_fp16)[name = string("linear_77_cast_fp16")];
tensor<int32, [4]> var_2265 = const()[name = string("op_2265"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_2266_cast_fp16 = reshape(shape = var_2265, x = linear_77_cast_fp16)[name = string("op_2266_cast_fp16")];
tensor<int32, [4]> x_291_perm_0 = const()[name = string("x_291_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175230464))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279104))))[name = string("layers_11_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = var_2249_cast_fp16)[name = string("linear_78_cast_fp16")];
tensor<int32, [4]> var_2270 = const()[name = string("op_2270"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2271_cast_fp16 = reshape(shape = var_2270, x = linear_78_cast_fp16)[name = string("op_2271_cast_fp16")];
tensor<int32, [4]> x_295_perm_0 = const()[name = string("x_295_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279680))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328320))))[name = string("layers_11_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = var_2249_cast_fp16)[name = string("linear_79_cast_fp16")];
tensor<int32, [4]> var_2275 = const()[name = string("op_2275"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2276_cast_fp16 = reshape(shape = var_2275, x = linear_79_cast_fp16)[name = string("op_2276_cast_fp16")];
tensor<int32, [4]> transpose_67_perm_0 = const()[name = string("transpose_67_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_2232_promoted_1_to_fp16 = const()[name = string("op_2232_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_291_cast_fp16 = transpose(perm = x_291_perm_0, x = var_2266_cast_fp16)[name = string("transpose_20")];
tensor<fp16, [1, 16, 128, 128]> var_2280_cast_fp16 = pow(x = x_291_cast_fp16, y = var_2232_promoted_1_to_fp16)[name = string("op_2280_cast_fp16")];
tensor<int32, [1]> var_2282_axes_0 = const()[name = string("op_2282_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2282_keep_dims_0 = const()[name = string("op_2282_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_2282_cast_fp16 = reduce_mean(axes = var_2282_axes_0, keep_dims = var_2282_keep_dims_0, x = var_2280_cast_fp16)[name = string("op_2282_cast_fp16")];
fp16 var_2283_to_fp16 = const()[name = string("op_2283_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_2284_cast_fp16 = add(x = var_2282_cast_fp16, y = var_2283_to_fp16)[name = string("op_2284_cast_fp16")];
fp32 norm_91_epsilon_0 = const()[name = string("norm_91_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_91_cast_fp16 = rsqrt(epsilon = norm_91_epsilon_0, x = var_2284_cast_fp16)[name = string("norm_91_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_2286_cast_fp16 = mul(x = x_291_cast_fp16, y = norm_91_cast_fp16)[name = string("op_2286_cast_fp16")];
tensor<fp16, [128]> layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328896)))];
tensor<fp16, [1, 16, 128, 128]> var_2287_cast_fp16 = mul(x = var_2286_cast_fp16, y = layers_11_self_attn_q_norm_weight_to_fp16)[name = string("op_2287_cast_fp16")];
fp16 var_2232_promoted_2_to_fp16 = const()[name = string("op_2232_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_295_cast_fp16 = transpose(perm = x_295_perm_0, x = var_2271_cast_fp16)[name = string("transpose_19")];
tensor<fp16, [1, 8, 128, 128]> var_2291_cast_fp16 = pow(x = x_295_cast_fp16, y = var_2232_promoted_2_to_fp16)[name = string("op_2291_cast_fp16")];
tensor<int32, [1]> var_2293_axes_0 = const()[name = string("op_2293_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2293_keep_dims_0 = const()[name = string("op_2293_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_2293_cast_fp16 = reduce_mean(axes = var_2293_axes_0, keep_dims = var_2293_keep_dims_0, x = var_2291_cast_fp16)[name = string("op_2293_cast_fp16")];
fp16 var_2294_to_fp16 = const()[name = string("op_2294_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_2295_cast_fp16 = add(x = var_2293_cast_fp16, y = var_2294_to_fp16)[name = string("op_2295_cast_fp16")];
fp32 norm_93_epsilon_0 = const()[name = string("norm_93_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_93_cast_fp16 = rsqrt(epsilon = norm_93_epsilon_0, x = var_2295_cast_fp16)[name = string("norm_93_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_2297_cast_fp16 = mul(x = x_295_cast_fp16, y = norm_93_cast_fp16)[name = string("op_2297_cast_fp16")];
tensor<fp16, [128]> layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329216)))];
tensor<fp16, [1, 8, 128, 128]> var_2298_cast_fp16 = mul(x = var_2297_cast_fp16, y = layers_11_self_attn_k_norm_weight_to_fp16)[name = string("op_2298_cast_fp16")];
tensor<int32, [4]> x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = var_2287_cast_fp16)[name = string("x1_45_cast_fp16")];
tensor<int32, [4]> x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = var_2287_cast_fp16)[name = string("x2_45_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2319_cast_fp16 = mul(x = x1_45_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2319_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2320_cast_fp16 = mul(x = x2_45_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2320_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2321_cast_fp16 = sub(x = var_2319_cast_fp16, y = var_2320_cast_fp16)[name = string("op_2321_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2322_cast_fp16 = mul(x = x2_45_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2322_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2323_cast_fp16 = mul(x = x1_45_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2323_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2324_cast_fp16 = add(x = var_2322_cast_fp16, y = var_2323_cast_fp16)[name = string("op_2324_cast_fp16")];
bool q_23_interleave_0 = const()[name = string("q_23_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_23_cast_fp16 = concat(axis = var_2233, interleave = q_23_interleave_0, values = (var_2321_cast_fp16, var_2324_cast_fp16))[name = string("q_23_cast_fp16")];
tensor<int32, [4]> x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = var_2298_cast_fp16)[name = string("x1_47_cast_fp16")];
tensor<int32, [4]> x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = var_2298_cast_fp16)[name = string("x2_47_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2346_cast_fp16 = mul(x = x1_47_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2346_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2347_cast_fp16 = mul(x = x2_47_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2347_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2348_cast_fp16 = sub(x = var_2346_cast_fp16, y = var_2347_cast_fp16)[name = string("op_2348_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2349_cast_fp16 = mul(x = x2_47_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2349_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2350_cast_fp16 = mul(x = x1_47_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2350_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2351_cast_fp16 = add(x = var_2349_cast_fp16, y = var_2350_cast_fp16)[name = string("op_2351_cast_fp16")];
bool var_2353_interleave_0 = const()[name = string("op_2353_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_2353_cast_fp16 = concat(axis = var_2233, interleave = var_2353_interleave_0, values = (var_2348_cast_fp16, var_2351_cast_fp16))[name = string("op_2353_cast_fp16")];
tensor<int32, [4]> transpose_45_perm_0 = const()[name = string("transpose_45_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_202 = const()[name = string("concat_202"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_45_cast_fp16 = transpose(perm = transpose_45_perm_0, x = var_2353_cast_fp16)[name = string("transpose_18")];
tensor<fp16, [128, 1024]> reshape_67_cast_fp16 = reshape(shape = concat_202, x = transpose_45_cast_fp16)[name = string("reshape_67_cast_fp16")];
bool matmul_22_transpose_x_1 = const()[name = string("matmul_22_transpose_x_1"), val = bool(true)];
bool matmul_22_transpose_y_1 = const()[name = string("matmul_22_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_1, transpose_y = matmul_22_transpose_y_1, x = var_68_to_fp16, y = reshape_67_cast_fp16)[name = string("matmul_22_cast_fp16")];
tensor<int32, [4]> concat_205 = const()[name = string("concat_205"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_68_cast_fp16 = reshape(shape = concat_205, x = matmul_22_cast_fp16)[name = string("reshape_68_cast_fp16")];
tensor<int32, [4]> scattered_k_23_perm_0 = const()[name = string("scattered_k_23_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_210 = const()[name = string("concat_210"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_67_cast_fp16 = transpose(perm = transpose_67_perm_0, x = var_2276_cast_fp16)[name = string("transpose_17")];
tensor<fp16, [128, 1024]> reshape_70_cast_fp16 = reshape(shape = concat_210, x = transpose_67_cast_fp16)[name = string("reshape_70_cast_fp16")];
bool matmul_23_transpose_x_1 = const()[name = string("matmul_23_transpose_x_1"), val = bool(true)];
bool matmul_23_transpose_y_1 = const()[name = string("matmul_23_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_1, transpose_y = matmul_23_transpose_y_1, x = var_68_to_fp16, y = reshape_70_cast_fp16)[name = string("matmul_23_cast_fp16")];
tensor<int32, [4]> concat_213 = const()[name = string("concat_213"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_71_cast_fp16 = reshape(shape = concat_213, x = matmul_23_cast_fp16)[name = string("reshape_71_cast_fp16")];
tensor<int32, [4]> scattered_v_23_perm_0 = const()[name = string("scattered_v_23_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_22 = read_state(input = k_cache_11)[name = string("read_state_22")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_69_cast_fp16 = mul(x = read_state_22, y = var_224_cast_fp16)[name = string("k_cache_69_cast_fp16")];
write_state(data = k_cache_69_cast_fp16, input = k_cache_11)[name = string("coreml_update_state_100_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_100 = read_state(input = k_cache_11)[name = string("coreml_update_state_100")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_23_cast_fp16 = transpose(perm = scattered_k_23_perm_0, x = reshape_68_cast_fp16)[name = string("transpose_16")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_71_cast_fp16 = add(x = coreml_update_state_100, y = scattered_k_23_cast_fp16)[name = string("k_cache_71_cast_fp16")];
write_state(data = k_cache_71_cast_fp16, input = k_cache_11)[name = string("coreml_update_state_101_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_101 = read_state(input = k_cache_11)[name = string("coreml_update_state_101")];
tensor<fp16, [1, 8, 1024, 128]> read_state_23 = read_state(input = v_cache_11)[name = string("read_state_23")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_69_cast_fp16 = mul(x = read_state_23, y = var_224_cast_fp16)[name = string("v_cache_69_cast_fp16")];
write_state(data = v_cache_69_cast_fp16, input = v_cache_11)[name = string("coreml_update_state_102_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_102 = read_state(input = v_cache_11)[name = string("coreml_update_state_102")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_23_cast_fp16 = transpose(perm = scattered_v_23_perm_0, x = reshape_71_cast_fp16)[name = string("transpose_15")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_71_cast_fp16 = add(x = coreml_update_state_102, y = scattered_v_23_cast_fp16)[name = string("v_cache_71_cast_fp16")];
write_state(data = v_cache_71_cast_fp16, input = v_cache_11)[name = string("coreml_update_state_103_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_103 = read_state(input = v_cache_11)[name = string("coreml_update_state_103")];
tensor<int32, [1]> var_2364_axes_0 = const()[name = string("op_2364_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2364_cast_fp16 = expand_dims(axes = var_2364_axes_0, x = coreml_update_state_101)[name = string("op_2364_cast_fp16")];
tensor<int32, [5]> k_exp_45_reps_0 = const()[name = string("k_exp_45_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_45_cast_fp16 = tile(reps = k_exp_45_reps_0, x = var_2364_cast_fp16)[name = string("k_exp_45_cast_fp16")];
tensor<int32, [4]> var_2367 = const()[name = string("op_2367"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_47_cast_fp16 = reshape(shape = var_2367, x = k_exp_45_cast_fp16)[name = string("k_exp_47_cast_fp16")];
tensor<int32, [1]> var_2369_axes_0 = const()[name = string("op_2369_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2369_cast_fp16 = expand_dims(axes = var_2369_axes_0, x = coreml_update_state_103)[name = string("op_2369_cast_fp16")];
tensor<int32, [5]> v_exp_45_reps_0 = const()[name = string("v_exp_45_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_45_cast_fp16 = tile(reps = v_exp_45_reps_0, x = var_2369_cast_fp16)[name = string("v_exp_45_cast_fp16")];
tensor<int32, [4]> var_2372 = const()[name = string("op_2372"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_47_cast_fp16 = reshape(shape = var_2372, x = v_exp_45_cast_fp16)[name = string("v_exp_47_cast_fp16")];
bool var_2375_transpose_x_1 = const()[name = string("op_2375_transpose_x_1"), val = bool(false)];
bool var_2375_transpose_y_1 = const()[name = string("op_2375_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_2375_cast_fp16 = matmul(transpose_x = var_2375_transpose_x_1, transpose_y = var_2375_transpose_y_1, x = q_23_cast_fp16, y = k_exp_47_cast_fp16)[name = string("op_2375_cast_fp16")];
fp16 var_2376_to_fp16 = const()[name = string("op_2376_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_45_cast_fp16 = mul(x = var_2375_cast_fp16, y = var_2376_to_fp16)[name = string("attn_45_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_111_cast_fp16 = add(x = attn_45_cast_fp16, y = attention_mask_to_fp16)[name = string("input_111_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_47_cast_fp16 = softmax(axis = var_2233, x = input_111_cast_fp16)[name = string("attn_47_cast_fp16")];
bool out_23_transpose_x_0 = const()[name = string("out_23_transpose_x_0"), val = bool(false)];
bool out_23_transpose_y_0 = const()[name = string("out_23_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_23_cast_fp16 = matmul(transpose_x = out_23_transpose_x_0, transpose_y = out_23_transpose_y_0, x = attn_47_cast_fp16, y = v_exp_47_cast_fp16)[name = string("out_23_cast_fp16")];
tensor<int32, [4]> var_2381_perm_0 = const()[name = string("op_2381_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2382 = const()[name = string("op_2382"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_2381_cast_fp16 = transpose(perm = var_2381_perm_0, x = out_23_cast_fp16)[name = string("transpose_14")];
tensor<fp16, [1, 128, 2048]> input_113_cast_fp16 = reshape(shape = var_2382, x = var_2381_cast_fp16)[name = string("input_113_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329536))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179426752))))[name = string("layers_11_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("linear_80_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_305_cast_fp16 = add(x = x_285_cast_fp16, y = linear_80_cast_fp16)[name = string("x_305_cast_fp16")];
fp16 var_2232_promoted_3_to_fp16 = const()[name = string("op_2232_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2389_cast_fp16 = pow(x = x_305_cast_fp16, y = var_2232_promoted_3_to_fp16)[name = string("op_2389_cast_fp16")];
tensor<int32, [1]> var_2391_axes_0 = const()[name = string("op_2391_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2391_keep_dims_0 = const()[name = string("op_2391_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2391_cast_fp16 = reduce_mean(axes = var_2391_axes_0, keep_dims = var_2391_keep_dims_0, x = var_2389_cast_fp16)[name = string("op_2391_cast_fp16")];
fp16 var_2392_to_fp16 = const()[name = string("op_2392_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2393_cast_fp16 = add(x = var_2391_cast_fp16, y = var_2392_to_fp16)[name = string("op_2393_cast_fp16")];
fp32 norm_95_epsilon_0 = const()[name = string("norm_95_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_95_cast_fp16 = rsqrt(epsilon = norm_95_epsilon_0, x = var_2393_cast_fp16)[name = string("norm_95_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2395_cast_fp16 = mul(x = x_305_cast_fp16, y = norm_95_cast_fp16)[name = string("op_2395_cast_fp16")];
tensor<fp16, [1024]> layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427328)))];
tensor<fp16, [1, 128, 1024]> var_2396_cast_fp16 = mul(x = var_2395_cast_fp16, y = layers_11_post_attention_layernorm_weight_to_fp16)[name = string("op_2396_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179429440))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575232))))[name = string("layers_11_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = var_2396_cast_fp16)[name = string("linear_81_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_2406_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2406_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575808))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185721600))))[name = string("layers_11_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_up_proj_weight_to_fp16_palettized, x = var_2396_cast_fp16)[name = string("linear_82_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_119_cast_fp16 = mul(x = var_2406_cast_fp16, y = linear_82_cast_fp16)[name = string("input_119_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185722176))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188867968))))[name = string("layers_11_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("linear_83_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_311_cast_fp16 = add(x = x_305_cast_fp16, y = linear_83_cast_fp16)[name = string("x_311_cast_fp16")];
int32 var_2427 = const()[name = string("op_2427"), val = int32(-1)];
fp16 var_2426_promoted_to_fp16 = const()[name = string("op_2426_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2436_cast_fp16 = pow(x = x_311_cast_fp16, y = var_2426_promoted_to_fp16)[name = string("op_2436_cast_fp16")];
tensor<int32, [1]> var_2438_axes_0 = const()[name = string("op_2438_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2438_keep_dims_0 = const()[name = string("op_2438_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2438_cast_fp16 = reduce_mean(axes = var_2438_axes_0, keep_dims = var_2438_keep_dims_0, x = var_2436_cast_fp16)[name = string("op_2438_cast_fp16")];
fp16 var_2439_to_fp16 = const()[name = string("op_2439_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2440_cast_fp16 = add(x = var_2438_cast_fp16, y = var_2439_to_fp16)[name = string("op_2440_cast_fp16")];
fp32 norm_97_epsilon_0 = const()[name = string("norm_97_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_97_cast_fp16 = rsqrt(epsilon = norm_97_epsilon_0, x = var_2440_cast_fp16)[name = string("norm_97_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2442_cast_fp16 = mul(x = x_311_cast_fp16, y = norm_97_cast_fp16)[name = string("op_2442_cast_fp16")];
tensor<fp16, [1024]> layers_12_input_layernorm_weight_to_fp16 = const()[name = string("layers_12_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188868544)))];
tensor<fp16, [1, 128, 1024]> var_2443_cast_fp16 = mul(x = var_2442_cast_fp16, y = layers_12_input_layernorm_weight_to_fp16)[name = string("op_2443_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188870656))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190967872))))[name = string("layers_12_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = var_2443_cast_fp16)[name = string("linear_84_cast_fp16")];
tensor<int32, [4]> var_2459 = const()[name = string("op_2459"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_2460_cast_fp16 = reshape(shape = var_2459, x = linear_84_cast_fp16)[name = string("op_2460_cast_fp16")];
tensor<int32, [4]> x_317_perm_0 = const()[name = string("x_317_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190968448))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017088))))[name = string("layers_12_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = var_2443_cast_fp16)[name = string("linear_85_cast_fp16")];
tensor<int32, [4]> var_2464 = const()[name = string("op_2464"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2465_cast_fp16 = reshape(shape = var_2464, x = linear_85_cast_fp16)[name = string("op_2465_cast_fp16")];
tensor<int32, [4]> x_321_perm_0 = const()[name = string("x_321_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017664))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066304))))[name = string("layers_12_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = var_2443_cast_fp16)[name = string("linear_86_cast_fp16")];
tensor<int32, [4]> var_2469 = const()[name = string("op_2469"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2470_cast_fp16 = reshape(shape = var_2469, x = linear_86_cast_fp16)[name = string("op_2470_cast_fp16")];
tensor<int32, [4]> transpose_68_perm_0 = const()[name = string("transpose_68_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_2426_promoted_1_to_fp16 = const()[name = string("op_2426_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_317_cast_fp16 = transpose(perm = x_317_perm_0, x = var_2460_cast_fp16)[name = string("transpose_13")];
tensor<fp16, [1, 16, 128, 128]> var_2474_cast_fp16 = pow(x = x_317_cast_fp16, y = var_2426_promoted_1_to_fp16)[name = string("op_2474_cast_fp16")];
tensor<int32, [1]> var_2476_axes_0 = const()[name = string("op_2476_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2476_keep_dims_0 = const()[name = string("op_2476_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_2476_cast_fp16 = reduce_mean(axes = var_2476_axes_0, keep_dims = var_2476_keep_dims_0, x = var_2474_cast_fp16)[name = string("op_2476_cast_fp16")];
fp16 var_2477_to_fp16 = const()[name = string("op_2477_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_2478_cast_fp16 = add(x = var_2476_cast_fp16, y = var_2477_to_fp16)[name = string("op_2478_cast_fp16")];
fp32 norm_99_epsilon_0 = const()[name = string("norm_99_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_99_cast_fp16 = rsqrt(epsilon = norm_99_epsilon_0, x = var_2478_cast_fp16)[name = string("norm_99_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_2480_cast_fp16 = mul(x = x_317_cast_fp16, y = norm_99_cast_fp16)[name = string("op_2480_cast_fp16")];
tensor<fp16, [128]> layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066880)))];
tensor<fp16, [1, 16, 128, 128]> var_2481_cast_fp16 = mul(x = var_2480_cast_fp16, y = layers_12_self_attn_q_norm_weight_to_fp16)[name = string("op_2481_cast_fp16")];
fp16 var_2426_promoted_2_to_fp16 = const()[name = string("op_2426_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_321_cast_fp16 = transpose(perm = x_321_perm_0, x = var_2465_cast_fp16)[name = string("transpose_12")];
tensor<fp16, [1, 8, 128, 128]> var_2485_cast_fp16 = pow(x = x_321_cast_fp16, y = var_2426_promoted_2_to_fp16)[name = string("op_2485_cast_fp16")];
tensor<int32, [1]> var_2487_axes_0 = const()[name = string("op_2487_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2487_keep_dims_0 = const()[name = string("op_2487_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_2487_cast_fp16 = reduce_mean(axes = var_2487_axes_0, keep_dims = var_2487_keep_dims_0, x = var_2485_cast_fp16)[name = string("op_2487_cast_fp16")];
fp16 var_2488_to_fp16 = const()[name = string("op_2488_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_2489_cast_fp16 = add(x = var_2487_cast_fp16, y = var_2488_to_fp16)[name = string("op_2489_cast_fp16")];
fp32 norm_101_epsilon_0 = const()[name = string("norm_101_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_101_cast_fp16 = rsqrt(epsilon = norm_101_epsilon_0, x = var_2489_cast_fp16)[name = string("norm_101_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_2491_cast_fp16 = mul(x = x_321_cast_fp16, y = norm_101_cast_fp16)[name = string("op_2491_cast_fp16")];
tensor<fp16, [128]> layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067200)))];
tensor<fp16, [1, 8, 128, 128]> var_2492_cast_fp16 = mul(x = var_2491_cast_fp16, y = layers_12_self_attn_k_norm_weight_to_fp16)[name = string("op_2492_cast_fp16")];
tensor<int32, [4]> x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = var_2481_cast_fp16)[name = string("x1_49_cast_fp16")];
tensor<int32, [4]> x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = var_2481_cast_fp16)[name = string("x2_49_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2513_cast_fp16 = mul(x = x1_49_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2513_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2514_cast_fp16 = mul(x = x2_49_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2514_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2515_cast_fp16 = sub(x = var_2513_cast_fp16, y = var_2514_cast_fp16)[name = string("op_2515_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2516_cast_fp16 = mul(x = x2_49_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2516_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2517_cast_fp16 = mul(x = x1_49_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2517_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2518_cast_fp16 = add(x = var_2516_cast_fp16, y = var_2517_cast_fp16)[name = string("op_2518_cast_fp16")];
bool q_25_interleave_0 = const()[name = string("q_25_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_25_cast_fp16 = concat(axis = var_2427, interleave = q_25_interleave_0, values = (var_2515_cast_fp16, var_2518_cast_fp16))[name = string("q_25_cast_fp16")];
tensor<int32, [4]> x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = var_2492_cast_fp16)[name = string("x1_51_cast_fp16")];
tensor<int32, [4]> x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = var_2492_cast_fp16)[name = string("x2_51_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2540_cast_fp16 = mul(x = x1_51_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2540_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2541_cast_fp16 = mul(x = x2_51_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2541_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2542_cast_fp16 = sub(x = var_2540_cast_fp16, y = var_2541_cast_fp16)[name = string("op_2542_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2543_cast_fp16 = mul(x = x2_51_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2543_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2544_cast_fp16 = mul(x = x1_51_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2544_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2545_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2544_cast_fp16)[name = string("op_2545_cast_fp16")];
bool var_2547_interleave_0 = const()[name = string("op_2547_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_2547_cast_fp16 = concat(axis = var_2427, interleave = var_2547_interleave_0, values = (var_2542_cast_fp16, var_2545_cast_fp16))[name = string("op_2547_cast_fp16")];
tensor<int32, [4]> transpose_49_perm_0 = const()[name = string("transpose_49_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_220 = const()[name = string("concat_220"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_49_cast_fp16 = transpose(perm = transpose_49_perm_0, x = var_2547_cast_fp16)[name = string("transpose_11")];
tensor<fp16, [128, 1024]> reshape_73_cast_fp16 = reshape(shape = concat_220, x = transpose_49_cast_fp16)[name = string("reshape_73_cast_fp16")];
bool matmul_24_transpose_x_1 = const()[name = string("matmul_24_transpose_x_1"), val = bool(true)];
bool matmul_24_transpose_y_1 = const()[name = string("matmul_24_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_1, transpose_y = matmul_24_transpose_y_1, x = var_68_to_fp16, y = reshape_73_cast_fp16)[name = string("matmul_24_cast_fp16")];
tensor<int32, [4]> concat_223 = const()[name = string("concat_223"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_74_cast_fp16 = reshape(shape = concat_223, x = matmul_24_cast_fp16)[name = string("reshape_74_cast_fp16")];
tensor<int32, [4]> scattered_k_25_perm_0 = const()[name = string("scattered_k_25_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_228 = const()[name = string("concat_228"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_68_cast_fp16 = transpose(perm = transpose_68_perm_0, x = var_2470_cast_fp16)[name = string("transpose_10")];
tensor<fp16, [128, 1024]> reshape_76_cast_fp16 = reshape(shape = concat_228, x = transpose_68_cast_fp16)[name = string("reshape_76_cast_fp16")];
bool matmul_25_transpose_x_1 = const()[name = string("matmul_25_transpose_x_1"), val = bool(true)];
bool matmul_25_transpose_y_1 = const()[name = string("matmul_25_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_1, transpose_y = matmul_25_transpose_y_1, x = var_68_to_fp16, y = reshape_76_cast_fp16)[name = string("matmul_25_cast_fp16")];
tensor<int32, [4]> concat_231 = const()[name = string("concat_231"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_77_cast_fp16 = reshape(shape = concat_231, x = matmul_25_cast_fp16)[name = string("reshape_77_cast_fp16")];
tensor<int32, [4]> scattered_v_25_perm_0 = const()[name = string("scattered_v_25_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_24 = read_state(input = k_cache_12)[name = string("read_state_24")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_75_cast_fp16 = mul(x = read_state_24, y = var_224_cast_fp16)[name = string("k_cache_75_cast_fp16")];
write_state(data = k_cache_75_cast_fp16, input = k_cache_12)[name = string("coreml_update_state_104_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_104 = read_state(input = k_cache_12)[name = string("coreml_update_state_104")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_25_cast_fp16 = transpose(perm = scattered_k_25_perm_0, x = reshape_74_cast_fp16)[name = string("transpose_9")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_77_cast_fp16 = add(x = coreml_update_state_104, y = scattered_k_25_cast_fp16)[name = string("k_cache_77_cast_fp16")];
write_state(data = k_cache_77_cast_fp16, input = k_cache_12)[name = string("coreml_update_state_105_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_105 = read_state(input = k_cache_12)[name = string("coreml_update_state_105")];
tensor<fp16, [1, 8, 1024, 128]> read_state_25 = read_state(input = v_cache_12)[name = string("read_state_25")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_75_cast_fp16 = mul(x = read_state_25, y = var_224_cast_fp16)[name = string("v_cache_75_cast_fp16")];
write_state(data = v_cache_75_cast_fp16, input = v_cache_12)[name = string("coreml_update_state_106_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_106 = read_state(input = v_cache_12)[name = string("coreml_update_state_106")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_25_cast_fp16 = transpose(perm = scattered_v_25_perm_0, x = reshape_77_cast_fp16)[name = string("transpose_8")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_77_cast_fp16 = add(x = coreml_update_state_106, y = scattered_v_25_cast_fp16)[name = string("v_cache_77_cast_fp16")];
write_state(data = v_cache_77_cast_fp16, input = v_cache_12)[name = string("coreml_update_state_107_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_107 = read_state(input = v_cache_12)[name = string("coreml_update_state_107")];
tensor<int32, [1]> var_2558_axes_0 = const()[name = string("op_2558_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2558_cast_fp16 = expand_dims(axes = var_2558_axes_0, x = coreml_update_state_105)[name = string("op_2558_cast_fp16")];
tensor<int32, [5]> k_exp_49_reps_0 = const()[name = string("k_exp_49_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_49_cast_fp16 = tile(reps = k_exp_49_reps_0, x = var_2558_cast_fp16)[name = string("k_exp_49_cast_fp16")];
tensor<int32, [4]> var_2561 = const()[name = string("op_2561"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_51_cast_fp16 = reshape(shape = var_2561, x = k_exp_49_cast_fp16)[name = string("k_exp_51_cast_fp16")];
tensor<int32, [1]> var_2563_axes_0 = const()[name = string("op_2563_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2563_cast_fp16 = expand_dims(axes = var_2563_axes_0, x = coreml_update_state_107)[name = string("op_2563_cast_fp16")];
tensor<int32, [5]> v_exp_49_reps_0 = const()[name = string("v_exp_49_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_49_cast_fp16 = tile(reps = v_exp_49_reps_0, x = var_2563_cast_fp16)[name = string("v_exp_49_cast_fp16")];
tensor<int32, [4]> var_2566 = const()[name = string("op_2566"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_51_cast_fp16 = reshape(shape = var_2566, x = v_exp_49_cast_fp16)[name = string("v_exp_51_cast_fp16")];
bool var_2569_transpose_x_1 = const()[name = string("op_2569_transpose_x_1"), val = bool(false)];
bool var_2569_transpose_y_1 = const()[name = string("op_2569_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_2569_cast_fp16 = matmul(transpose_x = var_2569_transpose_x_1, transpose_y = var_2569_transpose_y_1, x = q_25_cast_fp16, y = k_exp_51_cast_fp16)[name = string("op_2569_cast_fp16")];
fp16 var_2570_to_fp16 = const()[name = string("op_2570_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_49_cast_fp16 = mul(x = var_2569_cast_fp16, y = var_2570_to_fp16)[name = string("attn_49_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_121_cast_fp16 = add(x = attn_49_cast_fp16, y = attention_mask_to_fp16)[name = string("input_121_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_51_cast_fp16 = softmax(axis = var_2427, x = input_121_cast_fp16)[name = string("attn_51_cast_fp16")];
bool out_25_transpose_x_0 = const()[name = string("out_25_transpose_x_0"), val = bool(false)];
bool out_25_transpose_y_0 = const()[name = string("out_25_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_25_cast_fp16 = matmul(transpose_x = out_25_transpose_x_0, transpose_y = out_25_transpose_y_0, x = attn_51_cast_fp16, y = v_exp_51_cast_fp16)[name = string("out_25_cast_fp16")];
tensor<int32, [4]> var_2575_perm_0 = const()[name = string("op_2575_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2576 = const()[name = string("op_2576"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_2575_cast_fp16 = transpose(perm = var_2575_perm_0, x = out_25_cast_fp16)[name = string("transpose_7")];
tensor<fp16, [1, 128, 2048]> input_123_cast_fp16 = reshape(shape = var_2576, x = var_2575_cast_fp16)[name = string("input_123_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067520))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195164736))))[name = string("layers_12_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("linear_87_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_331_cast_fp16 = add(x = x_311_cast_fp16, y = linear_87_cast_fp16)[name = string("x_331_cast_fp16")];
fp16 var_2426_promoted_3_to_fp16 = const()[name = string("op_2426_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2583_cast_fp16 = pow(x = x_331_cast_fp16, y = var_2426_promoted_3_to_fp16)[name = string("op_2583_cast_fp16")];
tensor<int32, [1]> var_2585_axes_0 = const()[name = string("op_2585_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2585_keep_dims_0 = const()[name = string("op_2585_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2585_cast_fp16 = reduce_mean(axes = var_2585_axes_0, keep_dims = var_2585_keep_dims_0, x = var_2583_cast_fp16)[name = string("op_2585_cast_fp16")];
fp16 var_2586_to_fp16 = const()[name = string("op_2586_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2587_cast_fp16 = add(x = var_2585_cast_fp16, y = var_2586_to_fp16)[name = string("op_2587_cast_fp16")];
fp32 norm_103_epsilon_0 = const()[name = string("norm_103_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_103_cast_fp16 = rsqrt(epsilon = norm_103_epsilon_0, x = var_2587_cast_fp16)[name = string("norm_103_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2589_cast_fp16 = mul(x = x_331_cast_fp16, y = norm_103_cast_fp16)[name = string("op_2589_cast_fp16")];
tensor<fp16, [1024]> layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195165312)))];
tensor<fp16, [1, 128, 1024]> var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = layers_12_post_attention_layernorm_weight_to_fp16)[name = string("op_2590_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195167424))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313216))))[name = string("layers_12_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = var_2590_cast_fp16)[name = string("linear_88_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_2600_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2600_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313792))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201459584))))[name = string("layers_12_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_up_proj_weight_to_fp16_palettized, x = var_2590_cast_fp16)[name = string("linear_89_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_129_cast_fp16 = mul(x = var_2600_cast_fp16, y = linear_89_cast_fp16)[name = string("input_129_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201460160))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204605952))))[name = string("layers_12_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("linear_90_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_337_cast_fp16 = add(x = x_331_cast_fp16, y = linear_90_cast_fp16)[name = string("x_337_cast_fp16")];
int32 var_2621 = const()[name = string("op_2621"), val = int32(-1)];
fp16 var_2620_promoted_to_fp16 = const()[name = string("op_2620_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2630_cast_fp16 = pow(x = x_337_cast_fp16, y = var_2620_promoted_to_fp16)[name = string("op_2630_cast_fp16")];
tensor<int32, [1]> var_2632_axes_0 = const()[name = string("op_2632_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2632_keep_dims_0 = const()[name = string("op_2632_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2632_cast_fp16 = reduce_mean(axes = var_2632_axes_0, keep_dims = var_2632_keep_dims_0, x = var_2630_cast_fp16)[name = string("op_2632_cast_fp16")];
fp16 var_2633_to_fp16 = const()[name = string("op_2633_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2634_cast_fp16 = add(x = var_2632_cast_fp16, y = var_2633_to_fp16)[name = string("op_2634_cast_fp16")];
fp32 norm_105_epsilon_0 = const()[name = string("norm_105_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_105_cast_fp16 = rsqrt(epsilon = norm_105_epsilon_0, x = var_2634_cast_fp16)[name = string("norm_105_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2636_cast_fp16 = mul(x = x_337_cast_fp16, y = norm_105_cast_fp16)[name = string("op_2636_cast_fp16")];
tensor<fp16, [1024]> layers_13_input_layernorm_weight_to_fp16 = const()[name = string("layers_13_input_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204606528)))];
tensor<fp16, [1, 128, 1024]> var_2637_cast_fp16 = mul(x = var_2636_cast_fp16, y = layers_13_input_layernorm_weight_to_fp16)[name = string("op_2637_cast_fp16")];
tensor<fp16, [2048, 1024]> layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204608640))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206705856))))[name = string("layers_13_self_attn_q_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 2048]> linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = var_2637_cast_fp16)[name = string("linear_91_cast_fp16")];
tensor<int32, [4]> var_2653 = const()[name = string("op_2653"), val = tensor<int32, [4]>([1, 128, 16, 128])];
tensor<fp16, [1, 128, 16, 128]> var_2654_cast_fp16 = reshape(shape = var_2653, x = linear_91_cast_fp16)[name = string("op_2654_cast_fp16")];
tensor<int32, [4]> x_343_perm_0 = const()[name = string("x_343_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206706432))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755072))))[name = string("layers_13_self_attn_k_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = var_2637_cast_fp16)[name = string("linear_92_cast_fp16")];
tensor<int32, [4]> var_2658 = const()[name = string("op_2658"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2659_cast_fp16 = reshape(shape = var_2658, x = linear_92_cast_fp16)[name = string("op_2659_cast_fp16")];
tensor<int32, [4]> x_347_perm_0 = const()[name = string("x_347_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<fp16, [1024, 1024]> layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755648))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804288))))[name = string("layers_13_self_attn_v_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = var_2637_cast_fp16)[name = string("linear_93_cast_fp16")];
tensor<int32, [4]> var_2663 = const()[name = string("op_2663"), val = tensor<int32, [4]>([1, 128, 8, 128])];
tensor<fp16, [1, 128, 8, 128]> var_2664_cast_fp16 = reshape(shape = var_2663, x = linear_93_cast_fp16)[name = string("op_2664_cast_fp16")];
tensor<int32, [4]> transpose_69_perm_0 = const()[name = string("transpose_69_perm_0"), val = tensor<int32, [4]>([1, 0, 2, 3])];
fp16 var_2620_promoted_1_to_fp16 = const()[name = string("op_2620_promoted_1_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 16, 128, 128]> x_343_cast_fp16 = transpose(perm = x_343_perm_0, x = var_2654_cast_fp16)[name = string("transpose_6")];
tensor<fp16, [1, 16, 128, 128]> var_2668_cast_fp16 = pow(x = x_343_cast_fp16, y = var_2620_promoted_1_to_fp16)[name = string("op_2668_cast_fp16")];
tensor<int32, [1]> var_2670_axes_0 = const()[name = string("op_2670_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2670_keep_dims_0 = const()[name = string("op_2670_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1]> var_2670_cast_fp16 = reduce_mean(axes = var_2670_axes_0, keep_dims = var_2670_keep_dims_0, x = var_2668_cast_fp16)[name = string("op_2670_cast_fp16")];
fp16 var_2671_to_fp16 = const()[name = string("op_2671_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 16, 128, 1]> var_2672_cast_fp16 = add(x = var_2670_cast_fp16, y = var_2671_to_fp16)[name = string("op_2672_cast_fp16")];
fp32 norm_107_epsilon_0 = const()[name = string("norm_107_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 16, 128, 1]> norm_107_cast_fp16 = rsqrt(epsilon = norm_107_epsilon_0, x = var_2672_cast_fp16)[name = string("norm_107_cast_fp16")];
tensor<fp16, [1, 16, 128, 128]> var_2674_cast_fp16 = mul(x = x_343_cast_fp16, y = norm_107_cast_fp16)[name = string("op_2674_cast_fp16")];
tensor<fp16, [128]> layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804864)))];
tensor<fp16, [1, 16, 128, 128]> var_2675_cast_fp16 = mul(x = var_2674_cast_fp16, y = layers_13_self_attn_q_norm_weight_to_fp16)[name = string("op_2675_cast_fp16")];
fp16 var_2620_promoted_2_to_fp16 = const()[name = string("op_2620_promoted_2_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 8, 128, 128]> x_347_cast_fp16 = transpose(perm = x_347_perm_0, x = var_2659_cast_fp16)[name = string("transpose_5")];
tensor<fp16, [1, 8, 128, 128]> var_2679_cast_fp16 = pow(x = x_347_cast_fp16, y = var_2620_promoted_2_to_fp16)[name = string("op_2679_cast_fp16")];
tensor<int32, [1]> var_2681_axes_0 = const()[name = string("op_2681_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2681_keep_dims_0 = const()[name = string("op_2681_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 8, 128, 1]> var_2681_cast_fp16 = reduce_mean(axes = var_2681_axes_0, keep_dims = var_2681_keep_dims_0, x = var_2679_cast_fp16)[name = string("op_2681_cast_fp16")];
fp16 var_2682_to_fp16 = const()[name = string("op_2682_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 8, 128, 1]> var_2683_cast_fp16 = add(x = var_2681_cast_fp16, y = var_2682_to_fp16)[name = string("op_2683_cast_fp16")];
fp32 norm_109_epsilon_0 = const()[name = string("norm_109_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 8, 128, 1]> norm_109_cast_fp16 = rsqrt(epsilon = norm_109_epsilon_0, x = var_2683_cast_fp16)[name = string("norm_109_cast_fp16")];
tensor<fp16, [1, 8, 128, 128]> var_2685_cast_fp16 = mul(x = x_347_cast_fp16, y = norm_109_cast_fp16)[name = string("op_2685_cast_fp16")];
tensor<fp16, [128]> layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805184)))];
tensor<fp16, [1, 8, 128, 128]> var_2686_cast_fp16 = mul(x = var_2685_cast_fp16, y = layers_13_self_attn_k_norm_weight_to_fp16)[name = string("op_2686_cast_fp16")];
tensor<int32, [4]> x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor<int32, [4]>([1, 16, 128, 64])];
tensor<bool, [4]> x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 16, 128, 64]> x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = var_2675_cast_fp16)[name = string("x1_53_cast_fp16")];
tensor<int32, [4]> x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor<int32, [4]>([1, 16, 128, 128])];
tensor<bool, [4]> x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 16, 128, 64]> x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = var_2675_cast_fp16)[name = string("x2_53_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2707_cast_fp16 = mul(x = x1_53_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2707_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2708_cast_fp16 = mul(x = x2_53_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2708_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2709_cast_fp16 = sub(x = var_2707_cast_fp16, y = var_2708_cast_fp16)[name = string("op_2709_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2710_cast_fp16 = mul(x = x2_53_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2710_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2711_cast_fp16 = mul(x = x1_53_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2711_cast_fp16")];
tensor<fp16, [1, 16, 128, 64]> var_2712_cast_fp16 = add(x = var_2710_cast_fp16, y = var_2711_cast_fp16)[name = string("op_2712_cast_fp16")];
bool q_interleave_0 = const()[name = string("q_interleave_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> q_cast_fp16 = concat(axis = var_2621, interleave = q_interleave_0, values = (var_2709_cast_fp16, var_2712_cast_fp16))[name = string("q_cast_fp16")];
tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 128, 64]> x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_2686_cast_fp16)[name = string("x1_cast_fp16")];
tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 128, 128])];
tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 128, 64]> x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_2686_cast_fp16)[name = string("x2_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2734_cast_fp16 = mul(x = x1_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2734_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2735_cast_fp16 = mul(x = x2_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2735_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2736_cast_fp16 = sub(x = var_2734_cast_fp16, y = var_2735_cast_fp16)[name = string("op_2736_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2737_cast_fp16 = mul(x = x2_cast_fp16, y = cos_val_1_cast_fp16)[name = string("op_2737_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2738_cast_fp16 = mul(x = x1_cast_fp16, y = sin_val_1_cast_fp16)[name = string("op_2738_cast_fp16")];
tensor<fp16, [1, 8, 128, 64]> var_2739_cast_fp16 = add(x = var_2737_cast_fp16, y = var_2738_cast_fp16)[name = string("op_2739_cast_fp16")];
bool var_2741_interleave_0 = const()[name = string("op_2741_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 128, 128]> var_2741_cast_fp16 = concat(axis = var_2621, interleave = var_2741_interleave_0, values = (var_2736_cast_fp16, var_2739_cast_fp16))[name = string("op_2741_cast_fp16")];
tensor<int32, [4]> transpose_53_perm_0 = const()[name = string("transpose_53_perm_0"), val = tensor<int32, [4]>([2, 0, 1, 3])];
tensor<int32, [2]> concat_238 = const()[name = string("concat_238"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_53_cast_fp16 = transpose(perm = transpose_53_perm_0, x = var_2741_cast_fp16)[name = string("transpose_4")];
tensor<fp16, [128, 1024]> reshape_79_cast_fp16 = reshape(shape = concat_238, x = transpose_53_cast_fp16)[name = string("reshape_79_cast_fp16")];
bool matmul_26_transpose_x_1 = const()[name = string("matmul_26_transpose_x_1"), val = bool(true)];
bool matmul_26_transpose_y_1 = const()[name = string("matmul_26_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_26_cast_fp16 = matmul(transpose_x = matmul_26_transpose_x_1, transpose_y = matmul_26_transpose_y_1, x = var_68_to_fp16, y = reshape_79_cast_fp16)[name = string("matmul_26_cast_fp16")];
tensor<int32, [4]> concat_241 = const()[name = string("concat_241"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_80_cast_fp16 = reshape(shape = concat_241, x = matmul_26_cast_fp16)[name = string("reshape_80_cast_fp16")];
tensor<int32, [4]> scattered_k_perm_0 = const()[name = string("scattered_k_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<int32, [2]> concat_246 = const()[name = string("concat_246"), val = tensor<int32, [2]>([128, 1024])];
tensor<fp16, [128, 1, 8, 128]> transpose_69_cast_fp16 = transpose(perm = transpose_69_perm_0, x = var_2664_cast_fp16)[name = string("transpose_3")];
tensor<fp16, [128, 1024]> reshape_82_cast_fp16 = reshape(shape = concat_246, x = transpose_69_cast_fp16)[name = string("reshape_82_cast_fp16")];
bool matmul_27_transpose_x_1 = const()[name = string("matmul_27_transpose_x_1"), val = bool(true)];
bool matmul_27_transpose_y_1 = const()[name = string("matmul_27_transpose_y_1"), val = bool(false)];
tensor<fp16, [1024, 1024]> matmul_27_cast_fp16 = matmul(transpose_x = matmul_27_transpose_x_1, transpose_y = matmul_27_transpose_y_1, x = var_68_to_fp16, y = reshape_82_cast_fp16)[name = string("matmul_27_cast_fp16")];
tensor<int32, [4]> concat_249 = const()[name = string("concat_249"), val = tensor<int32, [4]>([1024, 1, 8, 128])];
tensor<fp16, [1024, 1, 8, 128]> reshape_83_cast_fp16 = reshape(shape = concat_249, x = matmul_27_cast_fp16)[name = string("reshape_83_cast_fp16")];
tensor<int32, [4]> scattered_v_perm_0 = const()[name = string("scattered_v_perm_0"), val = tensor<int32, [4]>([1, 2, 0, 3])];
tensor<fp16, [1, 8, 1024, 128]> read_state_26 = read_state(input = k_cache_13)[name = string("read_state_26")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_81_cast_fp16 = mul(x = read_state_26, y = var_224_cast_fp16)[name = string("k_cache_81_cast_fp16")];
write_state(data = k_cache_81_cast_fp16, input = k_cache_13)[name = string("coreml_update_state_108_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_108 = read_state(input = k_cache_13)[name = string("coreml_update_state_108")];
tensor<fp16, [1, 8, 1024, 128]> scattered_k_cast_fp16 = transpose(perm = scattered_k_perm_0, x = reshape_80_cast_fp16)[name = string("transpose_2")];
tensor<fp16, [1, 8, 1024, 128]> k_cache_cast_fp16 = add(x = coreml_update_state_108, y = scattered_k_cast_fp16)[name = string("k_cache_cast_fp16")];
write_state(data = k_cache_cast_fp16, input = k_cache_13)[name = string("coreml_update_state_109_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_109 = read_state(input = k_cache_13)[name = string("coreml_update_state_109")];
tensor<fp16, [1, 8, 1024, 128]> read_state_27 = read_state(input = v_cache_13)[name = string("read_state_27")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_81_cast_fp16 = mul(x = read_state_27, y = var_224_cast_fp16)[name = string("v_cache_81_cast_fp16")];
write_state(data = v_cache_81_cast_fp16, input = v_cache_13)[name = string("coreml_update_state_110_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_110 = read_state(input = v_cache_13)[name = string("coreml_update_state_110")];
tensor<fp16, [1, 8, 1024, 128]> scattered_v_cast_fp16 = transpose(perm = scattered_v_perm_0, x = reshape_83_cast_fp16)[name = string("transpose_1")];
tensor<fp16, [1, 8, 1024, 128]> v_cache_cast_fp16 = add(x = coreml_update_state_110, y = scattered_v_cast_fp16)[name = string("v_cache_cast_fp16")];
write_state(data = v_cache_cast_fp16, input = v_cache_13)[name = string("coreml_update_state_111_write_state")];
tensor<fp16, [1, 8, 1024, 128]> coreml_update_state_111 = read_state(input = v_cache_13)[name = string("coreml_update_state_111")];
tensor<int32, [1]> var_2752_axes_0 = const()[name = string("op_2752_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2752_cast_fp16 = expand_dims(axes = var_2752_axes_0, x = coreml_update_state_109)[name = string("op_2752_cast_fp16")];
tensor<int32, [5]> k_exp_53_reps_0 = const()[name = string("k_exp_53_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> k_exp_53_cast_fp16 = tile(reps = k_exp_53_reps_0, x = var_2752_cast_fp16)[name = string("k_exp_53_cast_fp16")];
tensor<int32, [4]> var_2755 = const()[name = string("op_2755"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> k_exp_cast_fp16 = reshape(shape = var_2755, x = k_exp_53_cast_fp16)[name = string("k_exp_cast_fp16")];
tensor<int32, [1]> var_2757_axes_0 = const()[name = string("op_2757_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 8, 1, 1024, 128]> var_2757_cast_fp16 = expand_dims(axes = var_2757_axes_0, x = coreml_update_state_111)[name = string("op_2757_cast_fp16")];
tensor<int32, [5]> v_exp_53_reps_0 = const()[name = string("v_exp_53_reps_0"), val = tensor<int32, [5]>([1, 1, 2, 1, 1])];
tensor<fp16, [1, 8, 2, 1024, 128]> v_exp_53_cast_fp16 = tile(reps = v_exp_53_reps_0, x = var_2757_cast_fp16)[name = string("v_exp_53_cast_fp16")];
tensor<int32, [4]> var_2760 = const()[name = string("op_2760"), val = tensor<int32, [4]>([1, 16, 1024, 128])];
tensor<fp16, [1, 16, 1024, 128]> v_exp_cast_fp16 = reshape(shape = var_2760, x = v_exp_53_cast_fp16)[name = string("v_exp_cast_fp16")];
bool var_2763_transpose_x_1 = const()[name = string("op_2763_transpose_x_1"), val = bool(false)];
bool var_2763_transpose_y_1 = const()[name = string("op_2763_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 16, 128, 1024]> var_2763_cast_fp16 = matmul(transpose_x = var_2763_transpose_x_1, transpose_y = var_2763_transpose_y_1, x = q_cast_fp16, y = k_exp_cast_fp16)[name = string("op_2763_cast_fp16")];
fp16 var_2764_to_fp16 = const()[name = string("op_2764_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 16, 128, 1024]> attn_53_cast_fp16 = mul(x = var_2763_cast_fp16, y = var_2764_to_fp16)[name = string("attn_53_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> input_131_cast_fp16 = add(x = attn_53_cast_fp16, y = attention_mask_to_fp16)[name = string("input_131_cast_fp16")];
tensor<fp16, [1, 16, 128, 1024]> attn_cast_fp16 = softmax(axis = var_2621, x = input_131_cast_fp16)[name = string("attn_cast_fp16")];
bool out_transpose_x_0 = const()[name = string("out_transpose_x_0"), val = bool(false)];
bool out_transpose_y_0 = const()[name = string("out_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 16, 128, 128]> out_cast_fp16 = matmul(transpose_x = out_transpose_x_0, transpose_y = out_transpose_y_0, x = attn_cast_fp16, y = v_exp_cast_fp16)[name = string("out_cast_fp16")];
tensor<int32, [4]> var_2769_perm_0 = const()[name = string("op_2769_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_2770 = const()[name = string("op_2770"), val = tensor<int32, [3]>([1, 128, -1])];
tensor<fp16, [1, 128, 16, 128]> var_2769_cast_fp16 = transpose(perm = var_2769_perm_0, x = out_cast_fp16)[name = string("transpose_0")];
tensor<fp16, [1, 128, 2048]> input_133_cast_fp16 = reshape(shape = var_2770, x = var_2769_cast_fp16)[name = string("input_133_cast_fp16")];
tensor<fp16, [1024, 2048]> layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805504))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210902720))))[name = string("layers_13_self_attn_o_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = string("linear_94_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_357_cast_fp16 = add(x = x_337_cast_fp16, y = linear_94_cast_fp16)[name = string("x_357_cast_fp16")];
fp16 var_2620_promoted_3_to_fp16 = const()[name = string("op_2620_promoted_3_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 128, 1024]> var_2777_cast_fp16 = pow(x = x_357_cast_fp16, y = var_2620_promoted_3_to_fp16)[name = string("op_2777_cast_fp16")];
tensor<int32, [1]> var_2779_axes_0 = const()[name = string("op_2779_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2779_keep_dims_0 = const()[name = string("op_2779_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 128, 1]> var_2779_cast_fp16 = reduce_mean(axes = var_2779_axes_0, keep_dims = var_2779_keep_dims_0, x = var_2777_cast_fp16)[name = string("op_2779_cast_fp16")];
fp16 var_2780_to_fp16 = const()[name = string("op_2780_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 128, 1]> var_2781_cast_fp16 = add(x = var_2779_cast_fp16, y = var_2780_to_fp16)[name = string("op_2781_cast_fp16")];
fp32 norm_111_epsilon_0 = const()[name = string("norm_111_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 128, 1]> norm_111_cast_fp16 = rsqrt(epsilon = norm_111_epsilon_0, x = var_2781_cast_fp16)[name = string("norm_111_cast_fp16")];
tensor<fp16, [1, 128, 1024]> var_2783_cast_fp16 = mul(x = x_357_cast_fp16, y = norm_111_cast_fp16)[name = string("op_2783_cast_fp16")];
tensor<fp16, [1024]> layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210903296)))];
tensor<fp16, [1, 128, 1024]> var_2784_cast_fp16 = mul(x = var_2783_cast_fp16, y = layers_13_post_attention_layernorm_weight_to_fp16)[name = string("op_2784_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210905408))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051200))))[name = string("layers_13_mlp_gate_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = var_2784_cast_fp16)[name = string("linear_95_cast_fp16")];
tensor<fp16, [1, 128, 3072]> var_2794_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2794_cast_fp16")];
tensor<fp16, [3072, 1024]> layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051776))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217197568))))[name = string("layers_13_mlp_up_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 3072]> linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_up_proj_weight_to_fp16_palettized, x = var_2784_cast_fp16)[name = string("linear_96_cast_fp16")];
tensor<fp16, [1, 128, 3072]> input_139_cast_fp16 = mul(x = var_2794_cast_fp16, y = linear_96_cast_fp16)[name = string("input_139_cast_fp16")];
tensor<fp16, [1024, 3072]> layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217198144))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220343936))))[name = string("layers_13_mlp_down_proj_weight_to_fp16_palettized")];
tensor<fp16, [1, 128, 1024]> linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("linear_97_cast_fp16")];
tensor<fp16, [1, 128, 1024]> x_363_cast_fp16 = add(x = x_357_cast_fp16, y = linear_97_cast_fp16)[name = string("x_363_cast_fp16")];
tensor<int32, [3]> var_2810_begin_0 = const()[name = string("op_2810_begin_0"), val = tensor<int32, [3]>([0, -1, 0])];
tensor<int32, [3]> var_2810_end_0 = const()[name = string("op_2810_end_0"), val = tensor<int32, [3]>([1, 128, 1024])];
tensor<bool, [3]> var_2810_end_mask_0 = const()[name = string("op_2810_end_mask_0"), val = tensor<bool, [3]>([true, true, true])];
tensor<fp16, [1, 1, 1024]> var_2810_cast_fp16 = slice_by_index(begin = var_2810_begin_0, end = var_2810_end_0, end_mask = var_2810_end_mask_0, x = x_363_cast_fp16)[name = string("op_2810_cast_fp16")];
fp16 var_2819_promoted_to_fp16 = const()[name = string("op_2819_promoted_to_fp16"), val = fp16(0x1p+1)];
tensor<fp16, [1, 1, 1024]> var_2825_cast_fp16 = pow(x = var_2810_cast_fp16, y = var_2819_promoted_to_fp16)[name = string("op_2825_cast_fp16")];
tensor<int32, [1]> var_2827_axes_0 = const()[name = string("op_2827_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_2827_keep_dims_0 = const()[name = string("op_2827_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> var_2827_cast_fp16 = reduce_mean(axes = var_2827_axes_0, keep_dims = var_2827_keep_dims_0, x = var_2825_cast_fp16)[name = string("op_2827_cast_fp16")];
fp16 var_2828_to_fp16 = const()[name = string("op_2828_to_fp16"), val = fp16(0x1.1p-20)];
tensor<fp16, [1, 1, 1]> var_2829_cast_fp16 = add(x = var_2827_cast_fp16, y = var_2828_to_fp16)[name = string("op_2829_cast_fp16")];
fp32 norm_113_epsilon_0 = const()[name = string("norm_113_epsilon_0"), val = fp32(0x1.197998p-40)];
tensor<fp16, [1, 1, 1]> norm_113_cast_fp16 = rsqrt(epsilon = norm_113_epsilon_0, x = var_2829_cast_fp16)[name = string("norm_113_cast_fp16")];
tensor<fp16, [1, 1, 1024]> var_2831_cast_fp16 = mul(x = var_2810_cast_fp16, y = norm_113_cast_fp16)[name = string("op_2831_cast_fp16")];
tensor<fp16, [1024]> norm_weight_to_fp16 = const()[name = string("norm_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220344512)))];
tensor<fp16, [1, 1, 1024]> var_2832_cast_fp16 = mul(x = var_2831_cast_fp16, y = norm_weight_to_fp16)[name = string("op_2832_cast_fp16")];
tensor<fp16, [151936, 1024]> lm_head_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [151936, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220346624))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375929152))))[name = string("lm_head_weight_to_fp16_palettized")];
tensor<fp16, [151936]> linear_98_bias_0_to_fp16 = const()[name = string("linear_98_bias_0_to_fp16"), val = tensor<fp16, [151936]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375929728)))];
tensor<fp16, [1, 1, 151936]> logits = linear(bias = linear_98_bias_0_to_fp16, weight = lm_head_weight_to_fp16_palettized, x = var_2832_cast_fp16)[name = string("linear_98_cast_fp16")];
} -> (logits);
}