program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})] { func main(tensor cache_length, tensor input_embeds, tensor key_cache, tensor key_padding_mask, tensor kv_cache_update_mask, tensor value_cache) { tensor var_935_axes_0 = const()[name = string("op_935_axes_0"), val = tensor([0])]; tensor var_935 = expand_dims(axes = var_935_axes_0, x = cache_length)[name = string("op_935")]; tensor var_956_axes_0 = const()[name = string("op_956_axes_0"), val = tensor([-1])]; string pos_to_fp16_dtype_0 = const()[name = string("pos_to_fp16_dtype_0"), val = string("fp16")]; tensor var_935_to_fp16 = cast(dtype = pos_to_fp16_dtype_0, x = var_935)[name = string("cast_0")]; tensor var_956_cast_fp16 = expand_dims(axes = var_956_axes_0, x = var_935_to_fp16)[name = string("op_956_cast_fp16")]; bool var_957_transpose_x_0 = const()[name = string("op_957_transpose_x_0"), val = bool(false)]; bool var_957_transpose_y_0 = const()[name = string("op_957_transpose_y_0"), val = bool(false)]; tensor const_0_to_fp16 = const()[name = string("const_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor var_957_cast_fp16 = matmul(transpose_x = var_957_transpose_x_0, transpose_y = var_957_transpose_y_0, x = const_0_to_fp16, y = var_956_cast_fp16)[name = string("op_957_cast_fp16")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; int32 var_962 = const()[name = string("op_962"), val = int32(-1)]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_957_cast_fp16)[name = string("transpose_56")]; tensor emb_cast_fp16 = concat(axis = var_962, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; tensor var_964_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_964_cast_fp16")]; tensor var_972_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_972_cast_fp16")]; tensor var_981_axes_0 = const()[name = string("op_981_axes_0"), val = tensor([1])]; tensor var_981_cast_fp16 = expand_dims(axes = var_981_axes_0, x = kv_cache_update_mask)[name = string("op_981_cast_fp16")]; tensor update_mask_axes_0 = const()[name = string("update_mask_axes_0"), val = tensor([2])]; tensor update_mask_cast_fp16 = expand_dims(axes = update_mask_axes_0, x = var_981_cast_fp16)[name = string("update_mask_cast_fp16")]; tensor var_993_begin_0 = const()[name = string("op_993_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_993_end_0 = const()[name = string("op_993_end_0"), val = tensor([1, 1024, 1, 256])]; tensor var_993_end_mask_0 = const()[name = string("op_993_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_993_cast_fp16 = slice_by_index(begin = var_993_begin_0, end = var_993_end_0, end_mask = var_993_end_mask_0, x = key_cache)[name = string("op_993_cast_fp16")]; tensor var_1013_begin_0 = const()[name = string("op_1013_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1013_end_0 = const()[name = string("op_1013_end_0"), val = tensor([1, 1024, 1, 256])]; tensor var_1013_end_mask_0 = const()[name = string("op_1013_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1013_cast_fp16 = slice_by_index(begin = var_1013_begin_0, end = var_1013_end_0, end_mask = var_1013_end_mask_0, x = value_cache)[name = string("op_1013_cast_fp16")]; tensor var_1025_axes_0 = const()[name = string("op_1025_axes_0"), val = tensor([-1])]; tensor var_1025_cast_fp16 = squeeze(axes = var_1025_axes_0, x = input_embeds)[name = string("op_1025_cast_fp16")]; tensor var_1027_axes_0 = const()[name = string("op_1027_axes_0"), val = tensor([-1])]; tensor var_1027_cast_fp16 = squeeze(axes = var_1027_axes_0, x = var_1025_cast_fp16)[name = string("op_1027_cast_fp16")]; tensor hidden_states_1_axes_0 = const()[name = string("hidden_states_1_axes_0"), val = tensor([0])]; tensor hidden_states_1_cast_fp16 = expand_dims(axes = hidden_states_1_axes_0, x = var_1027_cast_fp16)[name = string("hidden_states_1_cast_fp16")]; fp16 var_1033_promoted_to_fp16 = const()[name = string("op_1033_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1039_cast_fp16 = pow(x = hidden_states_1_cast_fp16, y = var_1033_promoted_to_fp16)[name = string("op_1039_cast_fp16")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_1039_cast_fp16)[name = string("variance_1_cast_fp16")]; tensor const_1_to_fp16 = const()[name = string("const_1_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256)))]; tensor var_1043_cast_fp16 = mul(x = const_1_to_fp16, y = hidden_states_1_cast_fp16)[name = string("op_1043_cast_fp16")]; fp16 var_1044_to_fp16 = const()[name = string("op_1044_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1045_cast_fp16 = add(x = variance_1_cast_fp16, y = var_1044_to_fp16)[name = string("op_1045_cast_fp16")]; fp32 var_1046_epsilon_0 = const()[name = string("op_1046_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1046_cast_fp16 = rsqrt(epsilon = var_1046_epsilon_0, x = var_1045_cast_fp16)[name = string("op_1046_cast_fp16")]; tensor input_1_cast_fp16 = mul(x = var_1043_cast_fp16, y = var_1046_cast_fp16)[name = string("input_1_cast_fp16")]; tensor layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2099584))))[name = string("layers_0_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2100160)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_1055 = const()[name = string("op_1055"), val = tensor([1, 1, 16, 128])]; tensor var_1056_cast_fp16 = reshape(shape = var_1055, x = linear_0_cast_fp16)[name = string("op_1056_cast_fp16")]; tensor layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2104320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3152960))))[name = string("layers_0_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3153536)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor var_1067 = const()[name = string("op_1067"), val = tensor([1, 1, 8, 128])]; tensor var_1068_cast_fp16 = reshape(shape = var_1067, x = linear_1_cast_fp16)[name = string("op_1068_cast_fp16")]; tensor layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3155648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204288))))[name = string("layers_0_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_2_cast_fp16")]; fp16 var_1087_promoted_to_fp16 = const()[name = string("op_1087_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1093_cast_fp16 = pow(x = var_1056_cast_fp16, y = var_1087_promoted_to_fp16)[name = string("op_1093_cast_fp16")]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor const_282 = const()[name = string("const_282"), val = tensor([3])]; tensor variance_3_cast_fp16 = reduce_mean(axes = const_282, keep_dims = variance_3_keep_dims_0, x = var_1093_cast_fp16)[name = string("variance_3_cast_fp16")]; tensor const_283_to_fp16 = const()[name = string("const_283_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4204864)))]; tensor var_1097_cast_fp16 = mul(x = const_283_to_fp16, y = var_1056_cast_fp16)[name = string("op_1097_cast_fp16")]; fp16 var_1098_to_fp16 = const()[name = string("op_1098_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1099_cast_fp16 = add(x = variance_3_cast_fp16, y = var_1098_to_fp16)[name = string("op_1099_cast_fp16")]; fp32 var_1100_epsilon_0 = const()[name = string("op_1100_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1100_cast_fp16 = rsqrt(epsilon = var_1100_epsilon_0, x = var_1099_cast_fp16)[name = string("op_1100_cast_fp16")]; tensor q_1_cast_fp16 = mul(x = var_1097_cast_fp16, y = var_1100_cast_fp16)[name = string("q_1_cast_fp16")]; fp16 var_1105_promoted_to_fp16 = const()[name = string("op_1105_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1111_cast_fp16 = pow(x = var_1068_cast_fp16, y = var_1105_promoted_to_fp16)[name = string("op_1111_cast_fp16")]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor const_284 = const()[name = string("const_284"), val = tensor([3])]; tensor variance_5_cast_fp16 = reduce_mean(axes = const_284, keep_dims = variance_5_keep_dims_0, x = var_1111_cast_fp16)[name = string("variance_5_cast_fp16")]; tensor const_285_to_fp16 = const()[name = string("const_285_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205184)))]; tensor var_1115_cast_fp16 = mul(x = const_285_to_fp16, y = var_1068_cast_fp16)[name = string("op_1115_cast_fp16")]; fp16 var_1116_to_fp16 = const()[name = string("op_1116_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1117_cast_fp16 = add(x = variance_5_cast_fp16, y = var_1116_to_fp16)[name = string("op_1117_cast_fp16")]; fp32 var_1118_epsilon_0 = const()[name = string("op_1118_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1118_cast_fp16 = rsqrt(epsilon = var_1118_epsilon_0, x = var_1117_cast_fp16)[name = string("op_1118_cast_fp16")]; tensor k_1_cast_fp16 = mul(x = var_1115_cast_fp16, y = var_1118_cast_fp16)[name = string("k_1_cast_fp16")]; tensor cos_1_axes_0 = const()[name = string("cos_1_axes_0"), val = tensor([1])]; tensor cos_1_cast_fp16 = expand_dims(axes = cos_1_axes_0, x = var_964_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor sin_1_axes_0 = const()[name = string("sin_1_axes_0"), val = tensor([1])]; tensor sin_1_cast_fp16 = expand_dims(axes = sin_1_axes_0, x = var_972_cast_fp16)[name = string("sin_1_cast_fp16")]; tensor var_1133_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1133_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1154_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_1154_cast_fp16")]; int32 var_1156 = const()[name = string("op_1156"), val = int32(-1)]; bool var_1157_interleave_0 = const()[name = string("op_1157_interleave_0"), val = bool(false)]; tensor var_1157_cast_fp16 = concat(axis = var_1156, interleave = var_1157_interleave_0, values = (var_1154_cast_fp16, x1_1_cast_fp16))[name = string("op_1157_cast_fp16")]; tensor var_1158_cast_fp16 = mul(x = var_1157_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1158_cast_fp16")]; tensor q_5_cast_fp16 = add(x = var_1133_cast_fp16, y = var_1158_cast_fp16)[name = string("q_5_cast_fp16")]; tensor var_1161_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1161_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1182_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_1182_cast_fp16")]; int32 var_1184 = const()[name = string("op_1184"), val = int32(-1)]; bool var_1185_interleave_0 = const()[name = string("op_1185_interleave_0"), val = bool(false)]; tensor var_1185_cast_fp16 = concat(axis = var_1184, interleave = var_1185_interleave_0, values = (var_1182_cast_fp16, x1_3_cast_fp16))[name = string("op_1185_cast_fp16")]; tensor var_1186_cast_fp16 = mul(x = var_1185_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1186_cast_fp16")]; tensor k_5_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1186_cast_fp16)[name = string("k_5_cast_fp16")]; tensor var_1193 = const()[name = string("op_1193"), val = tensor([1, 1024, 1, 1])]; tensor nk_1_cast_fp16 = reshape(shape = var_1193, x = k_5_cast_fp16)[name = string("nk_1_cast_fp16")]; tensor var_1199 = const()[name = string("op_1199"), val = tensor([1, 1024, 1, 1])]; tensor nv_1_cast_fp16 = reshape(shape = var_1199, x = linear_2_cast_fp16)[name = string("nv_1_cast_fp16")]; fp16 var_1201_to_fp16 = const()[name = string("op_1201_to_fp16"), val = fp16(0x1p+0)]; tensor var_1203_cast_fp16 = sub(x = var_1201_to_fp16, y = update_mask_cast_fp16)[name = string("op_1203_cast_fp16")]; tensor var_1204_cast_fp16 = mul(x = var_993_cast_fp16, y = var_1203_cast_fp16)[name = string("op_1204_cast_fp16")]; tensor var_1205_cast_fp16 = mul(x = nk_1_cast_fp16, y = update_mask_cast_fp16)[name = string("op_1205_cast_fp16")]; tensor lkc_3_cast_fp16 = add(x = var_1204_cast_fp16, y = var_1205_cast_fp16)[name = string("lkc_3_cast_fp16")]; tensor var_1211_cast_fp16 = mul(x = var_1013_cast_fp16, y = var_1203_cast_fp16)[name = string("op_1211_cast_fp16")]; tensor var_1212_cast_fp16 = mul(x = nv_1_cast_fp16, y = update_mask_cast_fp16)[name = string("op_1212_cast_fp16")]; tensor lvc_3_cast_fp16 = add(x = var_1211_cast_fp16, y = var_1212_cast_fp16)[name = string("lvc_3_cast_fp16")]; tensor var_1216_axes_0 = const()[name = string("op_1216_axes_0"), val = tensor([2])]; tensor var_1216_cast_fp16 = squeeze(axes = var_1216_axes_0, x = lkc_3_cast_fp16)[name = string("op_1216_cast_fp16")]; tensor var_1221 = const()[name = string("op_1221"), val = tensor([1, 8, 128, 256])]; tensor kc_1_cast_fp16 = reshape(shape = var_1221, x = var_1216_cast_fp16)[name = string("kc_1_cast_fp16")]; tensor var_1224_axes_0 = const()[name = string("op_1224_axes_0"), val = tensor([2])]; tensor var_1224_cast_fp16 = squeeze(axes = var_1224_axes_0, x = lvc_3_cast_fp16)[name = string("op_1224_cast_fp16")]; tensor var_1229 = const()[name = string("op_1229"), val = tensor([1, 8, 128, 256])]; tensor vc_1_cast_fp16 = reshape(shape = var_1229, x = var_1224_cast_fp16)[name = string("vc_1_cast_fp16")]; tensor var_1232_axes_0 = const()[name = string("op_1232_axes_0"), val = tensor([2])]; tensor var_1232_cast_fp16 = expand_dims(axes = var_1232_axes_0, x = kc_1_cast_fp16)[name = string("op_1232_cast_fp16")]; tensor var_1240_reps_0 = const()[name = string("op_1240_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1240_cast_fp16 = tile(reps = var_1240_reps_0, x = var_1232_cast_fp16)[name = string("op_1240_cast_fp16")]; tensor var_1245 = const()[name = string("op_1245"), val = tensor([1, 16, 128, 256])]; tensor kc_3_cast_fp16 = reshape(shape = var_1245, x = var_1240_cast_fp16)[name = string("kc_3_cast_fp16")]; tensor var_1248_axes_0 = const()[name = string("op_1248_axes_0"), val = tensor([2])]; tensor var_1248_cast_fp16 = expand_dims(axes = var_1248_axes_0, x = vc_1_cast_fp16)[name = string("op_1248_cast_fp16")]; tensor var_1256_reps_0 = const()[name = string("op_1256_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1256_cast_fp16 = tile(reps = var_1256_reps_0, x = var_1248_cast_fp16)[name = string("op_1256_cast_fp16")]; tensor var_1261 = const()[name = string("op_1261"), val = tensor([1, 16, 128, 256])]; tensor vc_3_cast_fp16 = reshape(shape = var_1261, x = var_1256_cast_fp16)[name = string("vc_3_cast_fp16")]; tensor var_1265_perm_0 = const()[name = string("op_1265_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_1266_transpose_x_0 = const()[name = string("op_1266_transpose_x_0"), val = bool(false)]; bool var_1266_transpose_y_0 = const()[name = string("op_1266_transpose_y_0"), val = bool(false)]; tensor var_1265_cast_fp16 = transpose(perm = var_1265_perm_0, x = q_5_cast_fp16)[name = string("transpose_55")]; tensor var_1266_cast_fp16 = matmul(transpose_x = var_1266_transpose_x_0, transpose_y = var_1266_transpose_y_0, x = var_1265_cast_fp16, y = kc_3_cast_fp16)[name = string("op_1266_cast_fp16")]; fp16 _inversed_aw_1_y_0_to_fp16 = const()[name = string("_inversed_aw_1_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_1_cast_fp16 = mul(x = var_1266_cast_fp16, y = _inversed_aw_1_y_0_to_fp16)[name = string("_inversed_aw_1_cast_fp16")]; tensor var_1270_axes_0 = const()[name = string("op_1270_axes_0"), val = tensor([1])]; tensor var_1270_cast_fp16 = expand_dims(axes = var_1270_axes_0, x = key_padding_mask)[name = string("op_1270_cast_fp16")]; tensor var_1272_axes_0 = const()[name = string("op_1272_axes_0"), val = tensor([2])]; tensor var_1272_cast_fp16 = expand_dims(axes = var_1272_axes_0, x = var_1270_cast_fp16)[name = string("op_1272_cast_fp16")]; tensor aw_3_cast_fp16 = add(x = _inversed_aw_1_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_3_cast_fp16")]; int32 var_1280 = const()[name = string("op_1280"), val = int32(-1)]; tensor aw_7_cast_fp16 = softmax(axis = var_1280, x = aw_3_cast_fp16)[name = string("aw_7_cast_fp16")]; bool var_1286_transpose_x_1 = const()[name = string("op_1286_transpose_x_1"), val = bool(false)]; bool var_1286_transpose_y_1 = const()[name = string("op_1286_transpose_y_1"), val = bool(true)]; tensor var_1286_cast_fp16 = matmul(transpose_x = var_1286_transpose_x_1, transpose_y = var_1286_transpose_y_1, x = aw_7_cast_fp16, y = vc_3_cast_fp16)[name = string("op_1286_cast_fp16")]; tensor var_1289_perm_0 = const()[name = string("op_1289_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1293 = const()[name = string("op_1293"), val = tensor([1, 1, -1])]; tensor var_1289_cast_fp16 = transpose(perm = var_1289_perm_0, x = var_1286_cast_fp16)[name = string("transpose_54")]; tensor input_3_cast_fp16 = reshape(shape = var_1293, x = var_1289_cast_fp16)[name = string("input_3_cast_fp16")]; tensor layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4205504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6302720))))[name = string("layers_0_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor var_1299_axes_0 = const()[name = string("op_1299_axes_0"), val = tensor([0])]; tensor var_1299_cast_fp16 = squeeze(axes = var_1299_axes_0, x = linear_3_cast_fp16)[name = string("op_1299_cast_fp16")]; tensor var_1301_axes_0 = const()[name = string("op_1301_axes_0"), val = tensor([0])]; tensor var_1301_cast_fp16 = squeeze(axes = var_1301_axes_0, x = var_1299_cast_fp16)[name = string("op_1301_cast_fp16")]; tensor var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor([-1])]; tensor var_1303_cast_fp16 = expand_dims(axes = var_1303_axes_0, x = var_1301_cast_fp16)[name = string("op_1303_cast_fp16")]; tensor ao_1_axes_0 = const()[name = string("ao_1_axes_0"), val = tensor([-1])]; tensor ao_1_cast_fp16 = expand_dims(axes = ao_1_axes_0, x = var_1303_cast_fp16)[name = string("ao_1_cast_fp16")]; tensor hidden_1_cast_fp16 = add(x = input_embeds, y = ao_1_cast_fp16)[name = string("hidden_1_cast_fp16")]; tensor var_1309_axes_0 = const()[name = string("op_1309_axes_0"), val = tensor([-1])]; tensor var_1309_cast_fp16 = squeeze(axes = var_1309_axes_0, x = hidden_1_cast_fp16)[name = string("op_1309_cast_fp16")]; tensor var_1311_axes_0 = const()[name = string("op_1311_axes_0"), val = tensor([-1])]; tensor var_1311_cast_fp16 = squeeze(axes = var_1311_axes_0, x = var_1309_cast_fp16)[name = string("op_1311_cast_fp16")]; tensor hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor([0])]; tensor hidden_states_13_cast_fp16 = expand_dims(axes = hidden_states_13_axes_0, x = var_1311_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; fp16 var_1317_promoted_to_fp16 = const()[name = string("op_1317_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1323_cast_fp16 = pow(x = hidden_states_13_cast_fp16, y = var_1317_promoted_to_fp16)[name = string("op_1323_cast_fp16")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_1323_cast_fp16)[name = string("variance_7_cast_fp16")]; tensor const_10_to_fp16 = const()[name = string("const_10_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6303296)))]; tensor var_1327_cast_fp16 = mul(x = const_10_to_fp16, y = hidden_states_13_cast_fp16)[name = string("op_1327_cast_fp16")]; fp16 var_1328_to_fp16 = const()[name = string("op_1328_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1329_cast_fp16 = add(x = variance_7_cast_fp16, y = var_1328_to_fp16)[name = string("op_1329_cast_fp16")]; fp32 var_1330_epsilon_0 = const()[name = string("op_1330_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1330_cast_fp16 = rsqrt(epsilon = var_1330_epsilon_0, x = var_1329_cast_fp16)[name = string("op_1330_cast_fp16")]; tensor input_5_cast_fp16 = mul(x = var_1327_cast_fp16, y = var_1330_cast_fp16)[name = string("input_5_cast_fp16")]; tensor layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6305408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451200))))[name = string("layers_0_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9451776)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_1338_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_1338_cast_fp16")]; tensor layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9457984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12603776))))[name = string("layers_0_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_0_mlp_up_proj_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_9_cast_fp16 = mul(x = var_1338_cast_fp16, y = linear_5_cast_fp16)[name = string("input_9_cast_fp16")]; tensor layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12604352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750144))))[name = string("layers_0_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor var_1345_axes_0 = const()[name = string("op_1345_axes_0"), val = tensor([0])]; tensor var_1345_cast_fp16 = squeeze(axes = var_1345_axes_0, x = linear_6_cast_fp16)[name = string("op_1345_cast_fp16")]; tensor var_1347_axes_0 = const()[name = string("op_1347_axes_0"), val = tensor([0])]; tensor var_1347_cast_fp16 = squeeze(axes = var_1347_axes_0, x = var_1345_cast_fp16)[name = string("op_1347_cast_fp16")]; tensor var_1349_axes_0 = const()[name = string("op_1349_axes_0"), val = tensor([-1])]; tensor var_1349_cast_fp16 = expand_dims(axes = var_1349_axes_0, x = var_1347_cast_fp16)[name = string("op_1349_cast_fp16")]; tensor h_1_axes_0 = const()[name = string("h_1_axes_0"), val = tensor([-1])]; tensor h_1_cast_fp16 = expand_dims(axes = h_1_axes_0, x = var_1349_cast_fp16)[name = string("h_1_cast_fp16")]; tensor hidden_3_cast_fp16 = add(x = hidden_1_cast_fp16, y = h_1_cast_fp16)[name = string("hidden_3_cast_fp16")]; tensor var_1363_begin_0 = const()[name = string("op_1363_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_1363_end_0 = const()[name = string("op_1363_end_0"), val = tensor([1, 2048, 1, 256])]; tensor var_1363_end_mask_0 = const()[name = string("op_1363_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1363_cast_fp16 = slice_by_index(begin = var_1363_begin_0, end = var_1363_end_0, end_mask = var_1363_end_mask_0, x = key_cache)[name = string("op_1363_cast_fp16")]; tensor var_1383_begin_0 = const()[name = string("op_1383_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_1383_end_0 = const()[name = string("op_1383_end_0"), val = tensor([1, 2048, 1, 256])]; tensor var_1383_end_mask_0 = const()[name = string("op_1383_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1383_cast_fp16 = slice_by_index(begin = var_1383_begin_0, end = var_1383_end_0, end_mask = var_1383_end_mask_0, x = value_cache)[name = string("op_1383_cast_fp16")]; tensor var_1395_axes_0 = const()[name = string("op_1395_axes_0"), val = tensor([-1])]; tensor var_1395_cast_fp16 = squeeze(axes = var_1395_axes_0, x = hidden_3_cast_fp16)[name = string("op_1395_cast_fp16")]; tensor var_1397_axes_0 = const()[name = string("op_1397_axes_0"), val = tensor([-1])]; tensor var_1397_cast_fp16 = squeeze(axes = var_1397_axes_0, x = var_1395_cast_fp16)[name = string("op_1397_cast_fp16")]; tensor hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor([0])]; tensor hidden_states_17_cast_fp16 = expand_dims(axes = hidden_states_17_axes_0, x = var_1397_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; fp16 var_1403_promoted_to_fp16 = const()[name = string("op_1403_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1409_cast_fp16 = pow(x = hidden_states_17_cast_fp16, y = var_1403_promoted_to_fp16)[name = string("op_1409_cast_fp16")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_1409_cast_fp16)[name = string("variance_9_cast_fp16")]; tensor const_11_to_fp16 = const()[name = string("const_11_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15750720)))]; tensor var_1413_cast_fp16 = mul(x = const_11_to_fp16, y = hidden_states_17_cast_fp16)[name = string("op_1413_cast_fp16")]; fp16 var_1414_to_fp16 = const()[name = string("op_1414_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1415_cast_fp16 = add(x = variance_9_cast_fp16, y = var_1414_to_fp16)[name = string("op_1415_cast_fp16")]; fp32 var_1416_epsilon_0 = const()[name = string("op_1416_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1416_cast_fp16 = rsqrt(epsilon = var_1416_epsilon_0, x = var_1415_cast_fp16)[name = string("op_1416_cast_fp16")]; tensor input_11_cast_fp16 = mul(x = var_1413_cast_fp16, y = var_1416_cast_fp16)[name = string("input_11_cast_fp16")]; tensor layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15752832))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850048))))[name = string("layers_1_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor var_1425 = const()[name = string("op_1425"), val = tensor([1, 1, 16, 128])]; tensor var_1426_cast_fp16 = reshape(shape = var_1425, x = linear_7_cast_fp16)[name = string("op_1426_cast_fp16")]; tensor layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17850624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899264))))[name = string("layers_1_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_1437 = const()[name = string("op_1437"), val = tensor([1, 1, 8, 128])]; tensor var_1438_cast_fp16 = reshape(shape = var_1437, x = linear_8_cast_fp16)[name = string("op_1438_cast_fp16")]; tensor layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18899840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19948480))))[name = string("layers_1_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("linear_9_cast_fp16")]; fp16 var_1457_promoted_to_fp16 = const()[name = string("op_1457_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1463_cast_fp16 = pow(x = var_1426_cast_fp16, y = var_1457_promoted_to_fp16)[name = string("op_1463_cast_fp16")]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor const_286 = const()[name = string("const_286"), val = tensor([3])]; tensor variance_11_cast_fp16 = reduce_mean(axes = const_286, keep_dims = variance_11_keep_dims_0, x = var_1463_cast_fp16)[name = string("variance_11_cast_fp16")]; tensor const_287_to_fp16 = const()[name = string("const_287_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949056)))]; tensor var_1467_cast_fp16 = mul(x = const_287_to_fp16, y = var_1426_cast_fp16)[name = string("op_1467_cast_fp16")]; fp16 var_1468_to_fp16 = const()[name = string("op_1468_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1469_cast_fp16 = add(x = variance_11_cast_fp16, y = var_1468_to_fp16)[name = string("op_1469_cast_fp16")]; fp32 var_1470_epsilon_0 = const()[name = string("op_1470_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1470_cast_fp16 = rsqrt(epsilon = var_1470_epsilon_0, x = var_1469_cast_fp16)[name = string("op_1470_cast_fp16")]; tensor q_7_cast_fp16 = mul(x = var_1467_cast_fp16, y = var_1470_cast_fp16)[name = string("q_7_cast_fp16")]; fp16 var_1475_promoted_to_fp16 = const()[name = string("op_1475_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1481_cast_fp16 = pow(x = var_1438_cast_fp16, y = var_1475_promoted_to_fp16)[name = string("op_1481_cast_fp16")]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor const_288 = const()[name = string("const_288"), val = tensor([3])]; tensor variance_13_cast_fp16 = reduce_mean(axes = const_288, keep_dims = variance_13_keep_dims_0, x = var_1481_cast_fp16)[name = string("variance_13_cast_fp16")]; tensor const_289_to_fp16 = const()[name = string("const_289_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949376)))]; tensor var_1485_cast_fp16 = mul(x = const_289_to_fp16, y = var_1438_cast_fp16)[name = string("op_1485_cast_fp16")]; fp16 var_1486_to_fp16 = const()[name = string("op_1486_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1487_cast_fp16 = add(x = variance_13_cast_fp16, y = var_1486_to_fp16)[name = string("op_1487_cast_fp16")]; fp32 var_1488_epsilon_0 = const()[name = string("op_1488_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1488_cast_fp16 = rsqrt(epsilon = var_1488_epsilon_0, x = var_1487_cast_fp16)[name = string("op_1488_cast_fp16")]; tensor k_7_cast_fp16 = mul(x = var_1485_cast_fp16, y = var_1488_cast_fp16)[name = string("k_7_cast_fp16")]; tensor var_1503_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1503_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_7_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_7_cast_fp16)[name = string("x2_5_cast_fp16")]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1524_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1524_cast_fp16")]; int32 var_1526 = const()[name = string("op_1526"), val = int32(-1)]; bool var_1527_interleave_0 = const()[name = string("op_1527_interleave_0"), val = bool(false)]; tensor var_1527_cast_fp16 = concat(axis = var_1526, interleave = var_1527_interleave_0, values = (var_1524_cast_fp16, x1_5_cast_fp16))[name = string("op_1527_cast_fp16")]; tensor var_1528_cast_fp16 = mul(x = var_1527_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1528_cast_fp16")]; tensor q_11_cast_fp16 = add(x = var_1503_cast_fp16, y = var_1528_cast_fp16)[name = string("q_11_cast_fp16")]; tensor var_1531_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1531_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_7_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_7_cast_fp16)[name = string("x2_7_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1552_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1552_cast_fp16")]; int32 var_1554 = const()[name = string("op_1554"), val = int32(-1)]; bool var_1555_interleave_0 = const()[name = string("op_1555_interleave_0"), val = bool(false)]; tensor var_1555_cast_fp16 = concat(axis = var_1554, interleave = var_1555_interleave_0, values = (var_1552_cast_fp16, x1_7_cast_fp16))[name = string("op_1555_cast_fp16")]; tensor var_1556_cast_fp16 = mul(x = var_1555_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1556_cast_fp16")]; tensor k_11_cast_fp16 = add(x = var_1531_cast_fp16, y = var_1556_cast_fp16)[name = string("k_11_cast_fp16")]; tensor var_1563 = const()[name = string("op_1563"), val = tensor([1, 1024, 1, 1])]; tensor nk_3_cast_fp16 = reshape(shape = var_1563, x = k_11_cast_fp16)[name = string("nk_3_cast_fp16")]; tensor var_1569 = const()[name = string("op_1569"), val = tensor([1, 1024, 1, 1])]; tensor nv_3_cast_fp16 = reshape(shape = var_1569, x = linear_9_cast_fp16)[name = string("nv_3_cast_fp16")]; tensor var_1574_cast_fp16 = mul(x = var_1363_cast_fp16, y = var_1203_cast_fp16)[name = string("op_1574_cast_fp16")]; tensor var_1575_cast_fp16 = mul(x = nk_3_cast_fp16, y = update_mask_cast_fp16)[name = string("op_1575_cast_fp16")]; tensor lkc_7_cast_fp16 = add(x = var_1574_cast_fp16, y = var_1575_cast_fp16)[name = string("lkc_7_cast_fp16")]; tensor var_1581_cast_fp16 = mul(x = var_1383_cast_fp16, y = var_1203_cast_fp16)[name = string("op_1581_cast_fp16")]; tensor var_1582_cast_fp16 = mul(x = nv_3_cast_fp16, y = update_mask_cast_fp16)[name = string("op_1582_cast_fp16")]; tensor lvc_7_cast_fp16 = add(x = var_1581_cast_fp16, y = var_1582_cast_fp16)[name = string("lvc_7_cast_fp16")]; tensor var_1586_axes_0 = const()[name = string("op_1586_axes_0"), val = tensor([2])]; tensor var_1586_cast_fp16 = squeeze(axes = var_1586_axes_0, x = lkc_7_cast_fp16)[name = string("op_1586_cast_fp16")]; tensor var_1591 = const()[name = string("op_1591"), val = tensor([1, 8, 128, 256])]; tensor kc_5_cast_fp16 = reshape(shape = var_1591, x = var_1586_cast_fp16)[name = string("kc_5_cast_fp16")]; tensor var_1594_axes_0 = const()[name = string("op_1594_axes_0"), val = tensor([2])]; tensor var_1594_cast_fp16 = squeeze(axes = var_1594_axes_0, x = lvc_7_cast_fp16)[name = string("op_1594_cast_fp16")]; tensor var_1599 = const()[name = string("op_1599"), val = tensor([1, 8, 128, 256])]; tensor vc_5_cast_fp16 = reshape(shape = var_1599, x = var_1594_cast_fp16)[name = string("vc_5_cast_fp16")]; tensor var_1602_axes_0 = const()[name = string("op_1602_axes_0"), val = tensor([2])]; tensor var_1602_cast_fp16 = expand_dims(axes = var_1602_axes_0, x = kc_5_cast_fp16)[name = string("op_1602_cast_fp16")]; tensor var_1610_reps_0 = const()[name = string("op_1610_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1610_cast_fp16 = tile(reps = var_1610_reps_0, x = var_1602_cast_fp16)[name = string("op_1610_cast_fp16")]; tensor var_1615 = const()[name = string("op_1615"), val = tensor([1, 16, 128, 256])]; tensor kc_7_cast_fp16 = reshape(shape = var_1615, x = var_1610_cast_fp16)[name = string("kc_7_cast_fp16")]; tensor var_1618_axes_0 = const()[name = string("op_1618_axes_0"), val = tensor([2])]; tensor var_1618_cast_fp16 = expand_dims(axes = var_1618_axes_0, x = vc_5_cast_fp16)[name = string("op_1618_cast_fp16")]; tensor var_1626_reps_0 = const()[name = string("op_1626_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1626_cast_fp16 = tile(reps = var_1626_reps_0, x = var_1618_cast_fp16)[name = string("op_1626_cast_fp16")]; tensor var_1631 = const()[name = string("op_1631"), val = tensor([1, 16, 128, 256])]; tensor vc_7_cast_fp16 = reshape(shape = var_1631, x = var_1626_cast_fp16)[name = string("vc_7_cast_fp16")]; tensor var_1635_perm_0 = const()[name = string("op_1635_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_1636_transpose_x_0 = const()[name = string("op_1636_transpose_x_0"), val = bool(false)]; bool var_1636_transpose_y_0 = const()[name = string("op_1636_transpose_y_0"), val = bool(false)]; tensor var_1635_cast_fp16 = transpose(perm = var_1635_perm_0, x = q_11_cast_fp16)[name = string("transpose_53")]; tensor var_1636_cast_fp16 = matmul(transpose_x = var_1636_transpose_x_0, transpose_y = var_1636_transpose_y_0, x = var_1635_cast_fp16, y = kc_7_cast_fp16)[name = string("op_1636_cast_fp16")]; fp16 _inversed_aw_9_y_0_to_fp16 = const()[name = string("_inversed_aw_9_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_9_cast_fp16 = mul(x = var_1636_cast_fp16, y = _inversed_aw_9_y_0_to_fp16)[name = string("_inversed_aw_9_cast_fp16")]; tensor aw_11_cast_fp16 = add(x = _inversed_aw_9_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_11_cast_fp16")]; int32 var_1650 = const()[name = string("op_1650"), val = int32(-1)]; tensor aw_15_cast_fp16 = softmax(axis = var_1650, x = aw_11_cast_fp16)[name = string("aw_15_cast_fp16")]; bool var_1656_transpose_x_1 = const()[name = string("op_1656_transpose_x_1"), val = bool(false)]; bool var_1656_transpose_y_1 = const()[name = string("op_1656_transpose_y_1"), val = bool(true)]; tensor var_1656_cast_fp16 = matmul(transpose_x = var_1656_transpose_x_1, transpose_y = var_1656_transpose_y_1, x = aw_15_cast_fp16, y = vc_7_cast_fp16)[name = string("op_1656_cast_fp16")]; tensor var_1659_perm_0 = const()[name = string("op_1659_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1663 = const()[name = string("op_1663"), val = tensor([1, 1, -1])]; tensor var_1659_cast_fp16 = transpose(perm = var_1659_perm_0, x = var_1656_cast_fp16)[name = string("transpose_52")]; tensor input_13_cast_fp16 = reshape(shape = var_1663, x = var_1659_cast_fp16)[name = string("input_13_cast_fp16")]; tensor layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19949696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22046912))))[name = string("layers_1_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_1669_axes_0 = const()[name = string("op_1669_axes_0"), val = tensor([0])]; tensor var_1669_cast_fp16 = squeeze(axes = var_1669_axes_0, x = linear_10_cast_fp16)[name = string("op_1669_cast_fp16")]; tensor var_1671_axes_0 = const()[name = string("op_1671_axes_0"), val = tensor([0])]; tensor var_1671_cast_fp16 = squeeze(axes = var_1671_axes_0, x = var_1669_cast_fp16)[name = string("op_1671_cast_fp16")]; tensor var_1673_axes_0 = const()[name = string("op_1673_axes_0"), val = tensor([-1])]; tensor var_1673_cast_fp16 = expand_dims(axes = var_1673_axes_0, x = var_1671_cast_fp16)[name = string("op_1673_cast_fp16")]; tensor ao_3_axes_0 = const()[name = string("ao_3_axes_0"), val = tensor([-1])]; tensor ao_3_cast_fp16 = expand_dims(axes = ao_3_axes_0, x = var_1673_cast_fp16)[name = string("ao_3_cast_fp16")]; tensor hidden_5_cast_fp16 = add(x = hidden_3_cast_fp16, y = ao_3_cast_fp16)[name = string("hidden_5_cast_fp16")]; tensor var_1679_axes_0 = const()[name = string("op_1679_axes_0"), val = tensor([-1])]; tensor var_1679_cast_fp16 = squeeze(axes = var_1679_axes_0, x = hidden_5_cast_fp16)[name = string("op_1679_cast_fp16")]; tensor var_1681_axes_0 = const()[name = string("op_1681_axes_0"), val = tensor([-1])]; tensor var_1681_cast_fp16 = squeeze(axes = var_1681_axes_0, x = var_1679_cast_fp16)[name = string("op_1681_cast_fp16")]; tensor hidden_states_29_axes_0 = const()[name = string("hidden_states_29_axes_0"), val = tensor([0])]; tensor hidden_states_29_cast_fp16 = expand_dims(axes = hidden_states_29_axes_0, x = var_1681_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; fp16 var_1687_promoted_to_fp16 = const()[name = string("op_1687_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1693_cast_fp16 = pow(x = hidden_states_29_cast_fp16, y = var_1687_promoted_to_fp16)[name = string("op_1693_cast_fp16")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_1693_cast_fp16)[name = string("variance_15_cast_fp16")]; tensor const_20_to_fp16 = const()[name = string("const_20_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22047488)))]; tensor var_1697_cast_fp16 = mul(x = const_20_to_fp16, y = hidden_states_29_cast_fp16)[name = string("op_1697_cast_fp16")]; fp16 var_1698_to_fp16 = const()[name = string("op_1698_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1699_cast_fp16 = add(x = variance_15_cast_fp16, y = var_1698_to_fp16)[name = string("op_1699_cast_fp16")]; fp32 var_1700_epsilon_0 = const()[name = string("op_1700_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1700_cast_fp16 = rsqrt(epsilon = var_1700_epsilon_0, x = var_1699_cast_fp16)[name = string("op_1700_cast_fp16")]; tensor input_15_cast_fp16 = mul(x = var_1697_cast_fp16, y = var_1700_cast_fp16)[name = string("input_15_cast_fp16")]; tensor layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22049600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195392))))[name = string("layers_1_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_1708_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_1708_cast_fp16")]; tensor layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25195968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28341760))))[name = string("layers_1_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_1_mlp_up_proj_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_19_cast_fp16 = mul(x = var_1708_cast_fp16, y = linear_12_cast_fp16)[name = string("input_19_cast_fp16")]; tensor layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28342336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488128))))[name = string("layers_1_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor var_1715_axes_0 = const()[name = string("op_1715_axes_0"), val = tensor([0])]; tensor var_1715_cast_fp16 = squeeze(axes = var_1715_axes_0, x = linear_13_cast_fp16)[name = string("op_1715_cast_fp16")]; tensor var_1717_axes_0 = const()[name = string("op_1717_axes_0"), val = tensor([0])]; tensor var_1717_cast_fp16 = squeeze(axes = var_1717_axes_0, x = var_1715_cast_fp16)[name = string("op_1717_cast_fp16")]; tensor var_1719_axes_0 = const()[name = string("op_1719_axes_0"), val = tensor([-1])]; tensor var_1719_cast_fp16 = expand_dims(axes = var_1719_axes_0, x = var_1717_cast_fp16)[name = string("op_1719_cast_fp16")]; tensor h_3_axes_0 = const()[name = string("h_3_axes_0"), val = tensor([-1])]; tensor h_3_cast_fp16 = expand_dims(axes = h_3_axes_0, x = var_1719_cast_fp16)[name = string("h_3_cast_fp16")]; tensor hidden_7_cast_fp16 = add(x = hidden_5_cast_fp16, y = h_3_cast_fp16)[name = string("hidden_7_cast_fp16")]; tensor var_1733_begin_0 = const()[name = string("op_1733_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1733_end_0 = const()[name = string("op_1733_end_0"), val = tensor([1, 3072, 1, 256])]; tensor var_1733_end_mask_0 = const()[name = string("op_1733_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1733_cast_fp16 = slice_by_index(begin = var_1733_begin_0, end = var_1733_end_0, end_mask = var_1733_end_mask_0, x = key_cache)[name = string("op_1733_cast_fp16")]; tensor var_1753_begin_0 = const()[name = string("op_1753_begin_0"), val = tensor([0, 2048, 0, 0])]; tensor var_1753_end_0 = const()[name = string("op_1753_end_0"), val = tensor([1, 3072, 1, 256])]; tensor var_1753_end_mask_0 = const()[name = string("op_1753_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1753_cast_fp16 = slice_by_index(begin = var_1753_begin_0, end = var_1753_end_0, end_mask = var_1753_end_mask_0, x = value_cache)[name = string("op_1753_cast_fp16")]; tensor var_1765_axes_0 = const()[name = string("op_1765_axes_0"), val = tensor([-1])]; tensor var_1765_cast_fp16 = squeeze(axes = var_1765_axes_0, x = hidden_7_cast_fp16)[name = string("op_1765_cast_fp16")]; tensor var_1767_axes_0 = const()[name = string("op_1767_axes_0"), val = tensor([-1])]; tensor var_1767_cast_fp16 = squeeze(axes = var_1767_axes_0, x = var_1765_cast_fp16)[name = string("op_1767_cast_fp16")]; tensor hidden_states_33_axes_0 = const()[name = string("hidden_states_33_axes_0"), val = tensor([0])]; tensor hidden_states_33_cast_fp16 = expand_dims(axes = hidden_states_33_axes_0, x = var_1767_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; fp16 var_1773_promoted_to_fp16 = const()[name = string("op_1773_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1779_cast_fp16 = pow(x = hidden_states_33_cast_fp16, y = var_1773_promoted_to_fp16)[name = string("op_1779_cast_fp16")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_1779_cast_fp16)[name = string("variance_17_cast_fp16")]; tensor const_21_to_fp16 = const()[name = string("const_21_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31488704)))]; tensor var_1783_cast_fp16 = mul(x = const_21_to_fp16, y = hidden_states_33_cast_fp16)[name = string("op_1783_cast_fp16")]; fp16 var_1784_to_fp16 = const()[name = string("op_1784_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1785_cast_fp16 = add(x = variance_17_cast_fp16, y = var_1784_to_fp16)[name = string("op_1785_cast_fp16")]; fp32 var_1786_epsilon_0 = const()[name = string("op_1786_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1786_cast_fp16 = rsqrt(epsilon = var_1786_epsilon_0, x = var_1785_cast_fp16)[name = string("op_1786_cast_fp16")]; tensor input_21_cast_fp16 = mul(x = var_1783_cast_fp16, y = var_1786_cast_fp16)[name = string("input_21_cast_fp16")]; tensor layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31490816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588032))))[name = string("layers_2_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor var_1795 = const()[name = string("op_1795"), val = tensor([1, 1, 16, 128])]; tensor var_1796_cast_fp16 = reshape(shape = var_1795, x = linear_14_cast_fp16)[name = string("op_1796_cast_fp16")]; tensor layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33588608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637248))))[name = string("layers_2_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor var_1807 = const()[name = string("op_1807"), val = tensor([1, 1, 8, 128])]; tensor var_1808_cast_fp16 = reshape(shape = var_1807, x = linear_15_cast_fp16)[name = string("op_1808_cast_fp16")]; tensor layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34637824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35686464))))[name = string("layers_2_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("linear_16_cast_fp16")]; fp16 var_1827_promoted_to_fp16 = const()[name = string("op_1827_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1833_cast_fp16 = pow(x = var_1796_cast_fp16, y = var_1827_promoted_to_fp16)[name = string("op_1833_cast_fp16")]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor const_290 = const()[name = string("const_290"), val = tensor([3])]; tensor variance_19_cast_fp16 = reduce_mean(axes = const_290, keep_dims = variance_19_keep_dims_0, x = var_1833_cast_fp16)[name = string("variance_19_cast_fp16")]; tensor const_291_to_fp16 = const()[name = string("const_291_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687040)))]; tensor var_1837_cast_fp16 = mul(x = const_291_to_fp16, y = var_1796_cast_fp16)[name = string("op_1837_cast_fp16")]; fp16 var_1838_to_fp16 = const()[name = string("op_1838_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1839_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1838_to_fp16)[name = string("op_1839_cast_fp16")]; fp32 var_1840_epsilon_0 = const()[name = string("op_1840_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1840_cast_fp16 = rsqrt(epsilon = var_1840_epsilon_0, x = var_1839_cast_fp16)[name = string("op_1840_cast_fp16")]; tensor q_13_cast_fp16 = mul(x = var_1837_cast_fp16, y = var_1840_cast_fp16)[name = string("q_13_cast_fp16")]; fp16 var_1845_promoted_to_fp16 = const()[name = string("op_1845_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_1851_cast_fp16 = pow(x = var_1808_cast_fp16, y = var_1845_promoted_to_fp16)[name = string("op_1851_cast_fp16")]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor const_292 = const()[name = string("const_292"), val = tensor([3])]; tensor variance_21_cast_fp16 = reduce_mean(axes = const_292, keep_dims = variance_21_keep_dims_0, x = var_1851_cast_fp16)[name = string("variance_21_cast_fp16")]; tensor const_293_to_fp16 = const()[name = string("const_293_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687360)))]; tensor var_1855_cast_fp16 = mul(x = const_293_to_fp16, y = var_1808_cast_fp16)[name = string("op_1855_cast_fp16")]; fp16 var_1856_to_fp16 = const()[name = string("op_1856_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1857_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1856_to_fp16)[name = string("op_1857_cast_fp16")]; fp32 var_1858_epsilon_0 = const()[name = string("op_1858_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1858_cast_fp16 = rsqrt(epsilon = var_1858_epsilon_0, x = var_1857_cast_fp16)[name = string("op_1858_cast_fp16")]; tensor k_13_cast_fp16 = mul(x = var_1855_cast_fp16, y = var_1858_cast_fp16)[name = string("k_13_cast_fp16")]; tensor var_1873_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1873_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_13_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_13_cast_fp16)[name = string("x2_9_cast_fp16")]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1894_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_1894_cast_fp16")]; int32 var_1896 = const()[name = string("op_1896"), val = int32(-1)]; bool var_1897_interleave_0 = const()[name = string("op_1897_interleave_0"), val = bool(false)]; tensor var_1897_cast_fp16 = concat(axis = var_1896, interleave = var_1897_interleave_0, values = (var_1894_cast_fp16, x1_9_cast_fp16))[name = string("op_1897_cast_fp16")]; tensor var_1898_cast_fp16 = mul(x = var_1897_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1898_cast_fp16")]; tensor q_17_cast_fp16 = add(x = var_1873_cast_fp16, y = var_1898_cast_fp16)[name = string("q_17_cast_fp16")]; tensor var_1901_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_1_cast_fp16)[name = string("op_1901_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_13_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_13_cast_fp16)[name = string("x2_11_cast_fp16")]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1922_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_1922_cast_fp16")]; int32 var_1924 = const()[name = string("op_1924"), val = int32(-1)]; bool var_1925_interleave_0 = const()[name = string("op_1925_interleave_0"), val = bool(false)]; tensor var_1925_cast_fp16 = concat(axis = var_1924, interleave = var_1925_interleave_0, values = (var_1922_cast_fp16, x1_11_cast_fp16))[name = string("op_1925_cast_fp16")]; tensor var_1926_cast_fp16 = mul(x = var_1925_cast_fp16, y = sin_1_cast_fp16)[name = string("op_1926_cast_fp16")]; tensor k_17_cast_fp16 = add(x = var_1901_cast_fp16, y = var_1926_cast_fp16)[name = string("k_17_cast_fp16")]; tensor var_1933 = const()[name = string("op_1933"), val = tensor([1, 1024, 1, 1])]; tensor nk_5_cast_fp16 = reshape(shape = var_1933, x = k_17_cast_fp16)[name = string("nk_5_cast_fp16")]; tensor var_1939 = const()[name = string("op_1939"), val = tensor([1, 1024, 1, 1])]; tensor nv_5_cast_fp16 = reshape(shape = var_1939, x = linear_16_cast_fp16)[name = string("nv_5_cast_fp16")]; tensor var_1944_cast_fp16 = mul(x = var_1733_cast_fp16, y = var_1203_cast_fp16)[name = string("op_1944_cast_fp16")]; tensor var_1945_cast_fp16 = mul(x = nk_5_cast_fp16, y = update_mask_cast_fp16)[name = string("op_1945_cast_fp16")]; tensor lkc_11_cast_fp16 = add(x = var_1944_cast_fp16, y = var_1945_cast_fp16)[name = string("lkc_11_cast_fp16")]; tensor var_1951_cast_fp16 = mul(x = var_1753_cast_fp16, y = var_1203_cast_fp16)[name = string("op_1951_cast_fp16")]; tensor var_1952_cast_fp16 = mul(x = nv_5_cast_fp16, y = update_mask_cast_fp16)[name = string("op_1952_cast_fp16")]; tensor lvc_11_cast_fp16 = add(x = var_1951_cast_fp16, y = var_1952_cast_fp16)[name = string("lvc_11_cast_fp16")]; tensor var_1956_axes_0 = const()[name = string("op_1956_axes_0"), val = tensor([2])]; tensor var_1956_cast_fp16 = squeeze(axes = var_1956_axes_0, x = lkc_11_cast_fp16)[name = string("op_1956_cast_fp16")]; tensor var_1961 = const()[name = string("op_1961"), val = tensor([1, 8, 128, 256])]; tensor kc_9_cast_fp16 = reshape(shape = var_1961, x = var_1956_cast_fp16)[name = string("kc_9_cast_fp16")]; tensor var_1964_axes_0 = const()[name = string("op_1964_axes_0"), val = tensor([2])]; tensor var_1964_cast_fp16 = squeeze(axes = var_1964_axes_0, x = lvc_11_cast_fp16)[name = string("op_1964_cast_fp16")]; tensor var_1969 = const()[name = string("op_1969"), val = tensor([1, 8, 128, 256])]; tensor vc_9_cast_fp16 = reshape(shape = var_1969, x = var_1964_cast_fp16)[name = string("vc_9_cast_fp16")]; tensor var_1972_axes_0 = const()[name = string("op_1972_axes_0"), val = tensor([2])]; tensor var_1972_cast_fp16 = expand_dims(axes = var_1972_axes_0, x = kc_9_cast_fp16)[name = string("op_1972_cast_fp16")]; tensor var_1980_reps_0 = const()[name = string("op_1980_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1980_cast_fp16 = tile(reps = var_1980_reps_0, x = var_1972_cast_fp16)[name = string("op_1980_cast_fp16")]; tensor var_1985 = const()[name = string("op_1985"), val = tensor([1, 16, 128, 256])]; tensor kc_11_cast_fp16 = reshape(shape = var_1985, x = var_1980_cast_fp16)[name = string("kc_11_cast_fp16")]; tensor var_1988_axes_0 = const()[name = string("op_1988_axes_0"), val = tensor([2])]; tensor var_1988_cast_fp16 = expand_dims(axes = var_1988_axes_0, x = vc_9_cast_fp16)[name = string("op_1988_cast_fp16")]; tensor var_1996_reps_0 = const()[name = string("op_1996_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_1996_cast_fp16 = tile(reps = var_1996_reps_0, x = var_1988_cast_fp16)[name = string("op_1996_cast_fp16")]; tensor var_2001 = const()[name = string("op_2001"), val = tensor([1, 16, 128, 256])]; tensor vc_11_cast_fp16 = reshape(shape = var_2001, x = var_1996_cast_fp16)[name = string("vc_11_cast_fp16")]; tensor var_2005_perm_0 = const()[name = string("op_2005_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_2006_transpose_x_0 = const()[name = string("op_2006_transpose_x_0"), val = bool(false)]; bool var_2006_transpose_y_0 = const()[name = string("op_2006_transpose_y_0"), val = bool(false)]; tensor var_2005_cast_fp16 = transpose(perm = var_2005_perm_0, x = q_17_cast_fp16)[name = string("transpose_51")]; tensor var_2006_cast_fp16 = matmul(transpose_x = var_2006_transpose_x_0, transpose_y = var_2006_transpose_y_0, x = var_2005_cast_fp16, y = kc_11_cast_fp16)[name = string("op_2006_cast_fp16")]; fp16 _inversed_aw_17_y_0_to_fp16 = const()[name = string("_inversed_aw_17_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_17_cast_fp16 = mul(x = var_2006_cast_fp16, y = _inversed_aw_17_y_0_to_fp16)[name = string("_inversed_aw_17_cast_fp16")]; tensor aw_19_cast_fp16 = add(x = _inversed_aw_17_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_19_cast_fp16")]; int32 var_2020 = const()[name = string("op_2020"), val = int32(-1)]; tensor aw_23_cast_fp16 = softmax(axis = var_2020, x = aw_19_cast_fp16)[name = string("aw_23_cast_fp16")]; bool var_2026_transpose_x_1 = const()[name = string("op_2026_transpose_x_1"), val = bool(false)]; bool var_2026_transpose_y_1 = const()[name = string("op_2026_transpose_y_1"), val = bool(true)]; tensor var_2026_cast_fp16 = matmul(transpose_x = var_2026_transpose_x_1, transpose_y = var_2026_transpose_y_1, x = aw_23_cast_fp16, y = vc_11_cast_fp16)[name = string("op_2026_cast_fp16")]; tensor var_2029_perm_0 = const()[name = string("op_2029_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2033 = const()[name = string("op_2033"), val = tensor([1, 1, -1])]; tensor var_2029_cast_fp16 = transpose(perm = var_2029_perm_0, x = var_2026_cast_fp16)[name = string("transpose_50")]; tensor input_23_cast_fp16 = reshape(shape = var_2033, x = var_2029_cast_fp16)[name = string("input_23_cast_fp16")]; tensor layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35687680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37784896))))[name = string("layers_2_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor var_2039_axes_0 = const()[name = string("op_2039_axes_0"), val = tensor([0])]; tensor var_2039_cast_fp16 = squeeze(axes = var_2039_axes_0, x = linear_17_cast_fp16)[name = string("op_2039_cast_fp16")]; tensor var_2041_axes_0 = const()[name = string("op_2041_axes_0"), val = tensor([0])]; tensor var_2041_cast_fp16 = squeeze(axes = var_2041_axes_0, x = var_2039_cast_fp16)[name = string("op_2041_cast_fp16")]; tensor var_2043_axes_0 = const()[name = string("op_2043_axes_0"), val = tensor([-1])]; tensor var_2043_cast_fp16 = expand_dims(axes = var_2043_axes_0, x = var_2041_cast_fp16)[name = string("op_2043_cast_fp16")]; tensor ao_5_axes_0 = const()[name = string("ao_5_axes_0"), val = tensor([-1])]; tensor ao_5_cast_fp16 = expand_dims(axes = ao_5_axes_0, x = var_2043_cast_fp16)[name = string("ao_5_cast_fp16")]; tensor hidden_9_cast_fp16 = add(x = hidden_7_cast_fp16, y = ao_5_cast_fp16)[name = string("hidden_9_cast_fp16")]; tensor var_2049_axes_0 = const()[name = string("op_2049_axes_0"), val = tensor([-1])]; tensor var_2049_cast_fp16 = squeeze(axes = var_2049_axes_0, x = hidden_9_cast_fp16)[name = string("op_2049_cast_fp16")]; tensor var_2051_axes_0 = const()[name = string("op_2051_axes_0"), val = tensor([-1])]; tensor var_2051_cast_fp16 = squeeze(axes = var_2051_axes_0, x = var_2049_cast_fp16)[name = string("op_2051_cast_fp16")]; tensor hidden_states_45_axes_0 = const()[name = string("hidden_states_45_axes_0"), val = tensor([0])]; tensor hidden_states_45_cast_fp16 = expand_dims(axes = hidden_states_45_axes_0, x = var_2051_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; fp16 var_2057_promoted_to_fp16 = const()[name = string("op_2057_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2063_cast_fp16 = pow(x = hidden_states_45_cast_fp16, y = var_2057_promoted_to_fp16)[name = string("op_2063_cast_fp16")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_2063_cast_fp16)[name = string("variance_23_cast_fp16")]; tensor const_30_to_fp16 = const()[name = string("const_30_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37785472)))]; tensor var_2067_cast_fp16 = mul(x = const_30_to_fp16, y = hidden_states_45_cast_fp16)[name = string("op_2067_cast_fp16")]; fp16 var_2068_to_fp16 = const()[name = string("op_2068_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2069_cast_fp16 = add(x = variance_23_cast_fp16, y = var_2068_to_fp16)[name = string("op_2069_cast_fp16")]; fp32 var_2070_epsilon_0 = const()[name = string("op_2070_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2070_cast_fp16 = rsqrt(epsilon = var_2070_epsilon_0, x = var_2069_cast_fp16)[name = string("op_2070_cast_fp16")]; tensor input_25_cast_fp16 = mul(x = var_2067_cast_fp16, y = var_2070_cast_fp16)[name = string("input_25_cast_fp16")]; tensor layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37787584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933376))))[name = string("layers_2_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_2078_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_2078_cast_fp16")]; tensor layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40933952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44079744))))[name = string("layers_2_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_2_mlp_up_proj_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_29_cast_fp16 = mul(x = var_2078_cast_fp16, y = linear_19_cast_fp16)[name = string("input_29_cast_fp16")]; tensor layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44080320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226112))))[name = string("layers_2_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor var_2085_axes_0 = const()[name = string("op_2085_axes_0"), val = tensor([0])]; tensor var_2085_cast_fp16 = squeeze(axes = var_2085_axes_0, x = linear_20_cast_fp16)[name = string("op_2085_cast_fp16")]; tensor var_2087_axes_0 = const()[name = string("op_2087_axes_0"), val = tensor([0])]; tensor var_2087_cast_fp16 = squeeze(axes = var_2087_axes_0, x = var_2085_cast_fp16)[name = string("op_2087_cast_fp16")]; tensor var_2089_axes_0 = const()[name = string("op_2089_axes_0"), val = tensor([-1])]; tensor var_2089_cast_fp16 = expand_dims(axes = var_2089_axes_0, x = var_2087_cast_fp16)[name = string("op_2089_cast_fp16")]; tensor h_5_axes_0 = const()[name = string("h_5_axes_0"), val = tensor([-1])]; tensor h_5_cast_fp16 = expand_dims(axes = h_5_axes_0, x = var_2089_cast_fp16)[name = string("h_5_cast_fp16")]; tensor hidden_11_cast_fp16 = add(x = hidden_9_cast_fp16, y = h_5_cast_fp16)[name = string("hidden_11_cast_fp16")]; tensor var_2103_begin_0 = const()[name = string("op_2103_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_2103_end_0 = const()[name = string("op_2103_end_0"), val = tensor([1, 4096, 1, 256])]; tensor var_2103_end_mask_0 = const()[name = string("op_2103_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2103_cast_fp16 = slice_by_index(begin = var_2103_begin_0, end = var_2103_end_0, end_mask = var_2103_end_mask_0, x = key_cache)[name = string("op_2103_cast_fp16")]; tensor var_2123_begin_0 = const()[name = string("op_2123_begin_0"), val = tensor([0, 3072, 0, 0])]; tensor var_2123_end_0 = const()[name = string("op_2123_end_0"), val = tensor([1, 4096, 1, 256])]; tensor var_2123_end_mask_0 = const()[name = string("op_2123_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2123_cast_fp16 = slice_by_index(begin = var_2123_begin_0, end = var_2123_end_0, end_mask = var_2123_end_mask_0, x = value_cache)[name = string("op_2123_cast_fp16")]; tensor var_2135_axes_0 = const()[name = string("op_2135_axes_0"), val = tensor([-1])]; tensor var_2135_cast_fp16 = squeeze(axes = var_2135_axes_0, x = hidden_11_cast_fp16)[name = string("op_2135_cast_fp16")]; tensor var_2137_axes_0 = const()[name = string("op_2137_axes_0"), val = tensor([-1])]; tensor var_2137_cast_fp16 = squeeze(axes = var_2137_axes_0, x = var_2135_cast_fp16)[name = string("op_2137_cast_fp16")]; tensor hidden_states_49_axes_0 = const()[name = string("hidden_states_49_axes_0"), val = tensor([0])]; tensor hidden_states_49_cast_fp16 = expand_dims(axes = hidden_states_49_axes_0, x = var_2137_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; fp16 var_2143_promoted_to_fp16 = const()[name = string("op_2143_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2149_cast_fp16 = pow(x = hidden_states_49_cast_fp16, y = var_2143_promoted_to_fp16)[name = string("op_2149_cast_fp16")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_2149_cast_fp16)[name = string("variance_25_cast_fp16")]; tensor const_31_to_fp16 = const()[name = string("const_31_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47226688)))]; tensor var_2153_cast_fp16 = mul(x = const_31_to_fp16, y = hidden_states_49_cast_fp16)[name = string("op_2153_cast_fp16")]; fp16 var_2154_to_fp16 = const()[name = string("op_2154_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2155_cast_fp16 = add(x = variance_25_cast_fp16, y = var_2154_to_fp16)[name = string("op_2155_cast_fp16")]; fp32 var_2156_epsilon_0 = const()[name = string("op_2156_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2156_cast_fp16 = rsqrt(epsilon = var_2156_epsilon_0, x = var_2155_cast_fp16)[name = string("op_2156_cast_fp16")]; tensor input_31_cast_fp16 = mul(x = var_2153_cast_fp16, y = var_2156_cast_fp16)[name = string("input_31_cast_fp16")]; tensor layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47228800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326016))))[name = string("layers_3_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor var_2165 = const()[name = string("op_2165"), val = tensor([1, 1, 16, 128])]; tensor var_2166_cast_fp16 = reshape(shape = var_2165, x = linear_21_cast_fp16)[name = string("op_2166_cast_fp16")]; tensor layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49326592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375232))))[name = string("layers_3_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor var_2177 = const()[name = string("op_2177"), val = tensor([1, 1, 8, 128])]; tensor var_2178_cast_fp16 = reshape(shape = var_2177, x = linear_22_cast_fp16)[name = string("op_2178_cast_fp16")]; tensor layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50375808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51424448))))[name = string("layers_3_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("linear_23_cast_fp16")]; fp16 var_2197_promoted_to_fp16 = const()[name = string("op_2197_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2203_cast_fp16 = pow(x = var_2166_cast_fp16, y = var_2197_promoted_to_fp16)[name = string("op_2203_cast_fp16")]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor const_294 = const()[name = string("const_294"), val = tensor([3])]; tensor variance_27_cast_fp16 = reduce_mean(axes = const_294, keep_dims = variance_27_keep_dims_0, x = var_2203_cast_fp16)[name = string("variance_27_cast_fp16")]; tensor const_295_to_fp16 = const()[name = string("const_295_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425024)))]; tensor var_2207_cast_fp16 = mul(x = const_295_to_fp16, y = var_2166_cast_fp16)[name = string("op_2207_cast_fp16")]; fp16 var_2208_to_fp16 = const()[name = string("op_2208_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2209_cast_fp16 = add(x = variance_27_cast_fp16, y = var_2208_to_fp16)[name = string("op_2209_cast_fp16")]; fp32 var_2210_epsilon_0 = const()[name = string("op_2210_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2210_cast_fp16 = rsqrt(epsilon = var_2210_epsilon_0, x = var_2209_cast_fp16)[name = string("op_2210_cast_fp16")]; tensor q_19_cast_fp16 = mul(x = var_2207_cast_fp16, y = var_2210_cast_fp16)[name = string("q_19_cast_fp16")]; fp16 var_2215_promoted_to_fp16 = const()[name = string("op_2215_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2221_cast_fp16 = pow(x = var_2178_cast_fp16, y = var_2215_promoted_to_fp16)[name = string("op_2221_cast_fp16")]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor const_296 = const()[name = string("const_296"), val = tensor([3])]; tensor variance_29_cast_fp16 = reduce_mean(axes = const_296, keep_dims = variance_29_keep_dims_0, x = var_2221_cast_fp16)[name = string("variance_29_cast_fp16")]; tensor const_297_to_fp16 = const()[name = string("const_297_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425344)))]; tensor var_2225_cast_fp16 = mul(x = const_297_to_fp16, y = var_2178_cast_fp16)[name = string("op_2225_cast_fp16")]; fp16 var_2226_to_fp16 = const()[name = string("op_2226_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2227_cast_fp16 = add(x = variance_29_cast_fp16, y = var_2226_to_fp16)[name = string("op_2227_cast_fp16")]; fp32 var_2228_epsilon_0 = const()[name = string("op_2228_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2228_cast_fp16 = rsqrt(epsilon = var_2228_epsilon_0, x = var_2227_cast_fp16)[name = string("op_2228_cast_fp16")]; tensor k_19_cast_fp16 = mul(x = var_2225_cast_fp16, y = var_2228_cast_fp16)[name = string("k_19_cast_fp16")]; tensor var_2243_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2243_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_19_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_19_cast_fp16)[name = string("x2_13_cast_fp16")]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2264_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_2264_cast_fp16")]; int32 var_2266 = const()[name = string("op_2266"), val = int32(-1)]; bool var_2267_interleave_0 = const()[name = string("op_2267_interleave_0"), val = bool(false)]; tensor var_2267_cast_fp16 = concat(axis = var_2266, interleave = var_2267_interleave_0, values = (var_2264_cast_fp16, x1_13_cast_fp16))[name = string("op_2267_cast_fp16")]; tensor var_2268_cast_fp16 = mul(x = var_2267_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2268_cast_fp16")]; tensor q_23_cast_fp16 = add(x = var_2243_cast_fp16, y = var_2268_cast_fp16)[name = string("q_23_cast_fp16")]; tensor var_2271_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2271_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_19_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_19_cast_fp16)[name = string("x2_15_cast_fp16")]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2292_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_2292_cast_fp16")]; int32 var_2294 = const()[name = string("op_2294"), val = int32(-1)]; bool var_2295_interleave_0 = const()[name = string("op_2295_interleave_0"), val = bool(false)]; tensor var_2295_cast_fp16 = concat(axis = var_2294, interleave = var_2295_interleave_0, values = (var_2292_cast_fp16, x1_15_cast_fp16))[name = string("op_2295_cast_fp16")]; tensor var_2296_cast_fp16 = mul(x = var_2295_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2296_cast_fp16")]; tensor k_23_cast_fp16 = add(x = var_2271_cast_fp16, y = var_2296_cast_fp16)[name = string("k_23_cast_fp16")]; tensor var_2303 = const()[name = string("op_2303"), val = tensor([1, 1024, 1, 1])]; tensor nk_7_cast_fp16 = reshape(shape = var_2303, x = k_23_cast_fp16)[name = string("nk_7_cast_fp16")]; tensor var_2309 = const()[name = string("op_2309"), val = tensor([1, 1024, 1, 1])]; tensor nv_7_cast_fp16 = reshape(shape = var_2309, x = linear_23_cast_fp16)[name = string("nv_7_cast_fp16")]; tensor var_2314_cast_fp16 = mul(x = var_2103_cast_fp16, y = var_1203_cast_fp16)[name = string("op_2314_cast_fp16")]; tensor var_2315_cast_fp16 = mul(x = nk_7_cast_fp16, y = update_mask_cast_fp16)[name = string("op_2315_cast_fp16")]; tensor lkc_15_cast_fp16 = add(x = var_2314_cast_fp16, y = var_2315_cast_fp16)[name = string("lkc_15_cast_fp16")]; tensor var_2321_cast_fp16 = mul(x = var_2123_cast_fp16, y = var_1203_cast_fp16)[name = string("op_2321_cast_fp16")]; tensor var_2322_cast_fp16 = mul(x = nv_7_cast_fp16, y = update_mask_cast_fp16)[name = string("op_2322_cast_fp16")]; tensor lvc_15_cast_fp16 = add(x = var_2321_cast_fp16, y = var_2322_cast_fp16)[name = string("lvc_15_cast_fp16")]; tensor var_2326_axes_0 = const()[name = string("op_2326_axes_0"), val = tensor([2])]; tensor var_2326_cast_fp16 = squeeze(axes = var_2326_axes_0, x = lkc_15_cast_fp16)[name = string("op_2326_cast_fp16")]; tensor var_2331 = const()[name = string("op_2331"), val = tensor([1, 8, 128, 256])]; tensor kc_13_cast_fp16 = reshape(shape = var_2331, x = var_2326_cast_fp16)[name = string("kc_13_cast_fp16")]; tensor var_2334_axes_0 = const()[name = string("op_2334_axes_0"), val = tensor([2])]; tensor var_2334_cast_fp16 = squeeze(axes = var_2334_axes_0, x = lvc_15_cast_fp16)[name = string("op_2334_cast_fp16")]; tensor var_2339 = const()[name = string("op_2339"), val = tensor([1, 8, 128, 256])]; tensor vc_13_cast_fp16 = reshape(shape = var_2339, x = var_2334_cast_fp16)[name = string("vc_13_cast_fp16")]; tensor var_2342_axes_0 = const()[name = string("op_2342_axes_0"), val = tensor([2])]; tensor var_2342_cast_fp16 = expand_dims(axes = var_2342_axes_0, x = kc_13_cast_fp16)[name = string("op_2342_cast_fp16")]; tensor var_2350_reps_0 = const()[name = string("op_2350_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2350_cast_fp16 = tile(reps = var_2350_reps_0, x = var_2342_cast_fp16)[name = string("op_2350_cast_fp16")]; tensor var_2355 = const()[name = string("op_2355"), val = tensor([1, 16, 128, 256])]; tensor kc_15_cast_fp16 = reshape(shape = var_2355, x = var_2350_cast_fp16)[name = string("kc_15_cast_fp16")]; tensor var_2358_axes_0 = const()[name = string("op_2358_axes_0"), val = tensor([2])]; tensor var_2358_cast_fp16 = expand_dims(axes = var_2358_axes_0, x = vc_13_cast_fp16)[name = string("op_2358_cast_fp16")]; tensor var_2366_reps_0 = const()[name = string("op_2366_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2366_cast_fp16 = tile(reps = var_2366_reps_0, x = var_2358_cast_fp16)[name = string("op_2366_cast_fp16")]; tensor var_2371 = const()[name = string("op_2371"), val = tensor([1, 16, 128, 256])]; tensor vc_15_cast_fp16 = reshape(shape = var_2371, x = var_2366_cast_fp16)[name = string("vc_15_cast_fp16")]; tensor var_2375_perm_0 = const()[name = string("op_2375_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_2376_transpose_x_0 = const()[name = string("op_2376_transpose_x_0"), val = bool(false)]; bool var_2376_transpose_y_0 = const()[name = string("op_2376_transpose_y_0"), val = bool(false)]; tensor var_2375_cast_fp16 = transpose(perm = var_2375_perm_0, x = q_23_cast_fp16)[name = string("transpose_49")]; tensor var_2376_cast_fp16 = matmul(transpose_x = var_2376_transpose_x_0, transpose_y = var_2376_transpose_y_0, x = var_2375_cast_fp16, y = kc_15_cast_fp16)[name = string("op_2376_cast_fp16")]; fp16 _inversed_aw_25_y_0_to_fp16 = const()[name = string("_inversed_aw_25_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_25_cast_fp16 = mul(x = var_2376_cast_fp16, y = _inversed_aw_25_y_0_to_fp16)[name = string("_inversed_aw_25_cast_fp16")]; tensor aw_27_cast_fp16 = add(x = _inversed_aw_25_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_27_cast_fp16")]; int32 var_2390 = const()[name = string("op_2390"), val = int32(-1)]; tensor aw_31_cast_fp16 = softmax(axis = var_2390, x = aw_27_cast_fp16)[name = string("aw_31_cast_fp16")]; bool var_2396_transpose_x_1 = const()[name = string("op_2396_transpose_x_1"), val = bool(false)]; bool var_2396_transpose_y_1 = const()[name = string("op_2396_transpose_y_1"), val = bool(true)]; tensor var_2396_cast_fp16 = matmul(transpose_x = var_2396_transpose_x_1, transpose_y = var_2396_transpose_y_1, x = aw_31_cast_fp16, y = vc_15_cast_fp16)[name = string("op_2396_cast_fp16")]; tensor var_2399_perm_0 = const()[name = string("op_2399_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2403 = const()[name = string("op_2403"), val = tensor([1, 1, -1])]; tensor var_2399_cast_fp16 = transpose(perm = var_2399_perm_0, x = var_2396_cast_fp16)[name = string("transpose_48")]; tensor input_33_cast_fp16 = reshape(shape = var_2403, x = var_2399_cast_fp16)[name = string("input_33_cast_fp16")]; tensor layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51425664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53522880))))[name = string("layers_3_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_2409_axes_0 = const()[name = string("op_2409_axes_0"), val = tensor([0])]; tensor var_2409_cast_fp16 = squeeze(axes = var_2409_axes_0, x = linear_24_cast_fp16)[name = string("op_2409_cast_fp16")]; tensor var_2411_axes_0 = const()[name = string("op_2411_axes_0"), val = tensor([0])]; tensor var_2411_cast_fp16 = squeeze(axes = var_2411_axes_0, x = var_2409_cast_fp16)[name = string("op_2411_cast_fp16")]; tensor var_2413_axes_0 = const()[name = string("op_2413_axes_0"), val = tensor([-1])]; tensor var_2413_cast_fp16 = expand_dims(axes = var_2413_axes_0, x = var_2411_cast_fp16)[name = string("op_2413_cast_fp16")]; tensor ao_7_axes_0 = const()[name = string("ao_7_axes_0"), val = tensor([-1])]; tensor ao_7_cast_fp16 = expand_dims(axes = ao_7_axes_0, x = var_2413_cast_fp16)[name = string("ao_7_cast_fp16")]; tensor hidden_13_cast_fp16 = add(x = hidden_11_cast_fp16, y = ao_7_cast_fp16)[name = string("hidden_13_cast_fp16")]; tensor var_2419_axes_0 = const()[name = string("op_2419_axes_0"), val = tensor([-1])]; tensor var_2419_cast_fp16 = squeeze(axes = var_2419_axes_0, x = hidden_13_cast_fp16)[name = string("op_2419_cast_fp16")]; tensor var_2421_axes_0 = const()[name = string("op_2421_axes_0"), val = tensor([-1])]; tensor var_2421_cast_fp16 = squeeze(axes = var_2421_axes_0, x = var_2419_cast_fp16)[name = string("op_2421_cast_fp16")]; tensor hidden_states_61_axes_0 = const()[name = string("hidden_states_61_axes_0"), val = tensor([0])]; tensor hidden_states_61_cast_fp16 = expand_dims(axes = hidden_states_61_axes_0, x = var_2421_cast_fp16)[name = string("hidden_states_61_cast_fp16")]; fp16 var_2427_promoted_to_fp16 = const()[name = string("op_2427_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2433_cast_fp16 = pow(x = hidden_states_61_cast_fp16, y = var_2427_promoted_to_fp16)[name = string("op_2433_cast_fp16")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_2433_cast_fp16)[name = string("variance_31_cast_fp16")]; tensor const_40_to_fp16 = const()[name = string("const_40_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53523456)))]; tensor var_2437_cast_fp16 = mul(x = const_40_to_fp16, y = hidden_states_61_cast_fp16)[name = string("op_2437_cast_fp16")]; fp16 var_2438_to_fp16 = const()[name = string("op_2438_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2439_cast_fp16 = add(x = variance_31_cast_fp16, y = var_2438_to_fp16)[name = string("op_2439_cast_fp16")]; fp32 var_2440_epsilon_0 = const()[name = string("op_2440_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2440_cast_fp16 = rsqrt(epsilon = var_2440_epsilon_0, x = var_2439_cast_fp16)[name = string("op_2440_cast_fp16")]; tensor input_35_cast_fp16 = mul(x = var_2437_cast_fp16, y = var_2440_cast_fp16)[name = string("input_35_cast_fp16")]; tensor layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53525568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671360))))[name = string("layers_3_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_2448_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_2448_cast_fp16")]; tensor layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56671936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59817728))))[name = string("layers_3_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_3_mlp_up_proj_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_39_cast_fp16 = mul(x = var_2448_cast_fp16, y = linear_26_cast_fp16)[name = string("input_39_cast_fp16")]; tensor layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59818304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964096))))[name = string("layers_3_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor var_2455_axes_0 = const()[name = string("op_2455_axes_0"), val = tensor([0])]; tensor var_2455_cast_fp16 = squeeze(axes = var_2455_axes_0, x = linear_27_cast_fp16)[name = string("op_2455_cast_fp16")]; tensor var_2457_axes_0 = const()[name = string("op_2457_axes_0"), val = tensor([0])]; tensor var_2457_cast_fp16 = squeeze(axes = var_2457_axes_0, x = var_2455_cast_fp16)[name = string("op_2457_cast_fp16")]; tensor var_2459_axes_0 = const()[name = string("op_2459_axes_0"), val = tensor([-1])]; tensor var_2459_cast_fp16 = expand_dims(axes = var_2459_axes_0, x = var_2457_cast_fp16)[name = string("op_2459_cast_fp16")]; tensor h_7_axes_0 = const()[name = string("h_7_axes_0"), val = tensor([-1])]; tensor h_7_cast_fp16 = expand_dims(axes = h_7_axes_0, x = var_2459_cast_fp16)[name = string("h_7_cast_fp16")]; tensor hidden_15_cast_fp16 = add(x = hidden_13_cast_fp16, y = h_7_cast_fp16)[name = string("hidden_15_cast_fp16")]; tensor var_2473_begin_0 = const()[name = string("op_2473_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_2473_end_0 = const()[name = string("op_2473_end_0"), val = tensor([1, 5120, 1, 256])]; tensor var_2473_end_mask_0 = const()[name = string("op_2473_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2473_cast_fp16 = slice_by_index(begin = var_2473_begin_0, end = var_2473_end_0, end_mask = var_2473_end_mask_0, x = key_cache)[name = string("op_2473_cast_fp16")]; tensor var_2493_begin_0 = const()[name = string("op_2493_begin_0"), val = tensor([0, 4096, 0, 0])]; tensor var_2493_end_0 = const()[name = string("op_2493_end_0"), val = tensor([1, 5120, 1, 256])]; tensor var_2493_end_mask_0 = const()[name = string("op_2493_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2493_cast_fp16 = slice_by_index(begin = var_2493_begin_0, end = var_2493_end_0, end_mask = var_2493_end_mask_0, x = value_cache)[name = string("op_2493_cast_fp16")]; tensor var_2505_axes_0 = const()[name = string("op_2505_axes_0"), val = tensor([-1])]; tensor var_2505_cast_fp16 = squeeze(axes = var_2505_axes_0, x = hidden_15_cast_fp16)[name = string("op_2505_cast_fp16")]; tensor var_2507_axes_0 = const()[name = string("op_2507_axes_0"), val = tensor([-1])]; tensor var_2507_cast_fp16 = squeeze(axes = var_2507_axes_0, x = var_2505_cast_fp16)[name = string("op_2507_cast_fp16")]; tensor hidden_states_65_axes_0 = const()[name = string("hidden_states_65_axes_0"), val = tensor([0])]; tensor hidden_states_65_cast_fp16 = expand_dims(axes = hidden_states_65_axes_0, x = var_2507_cast_fp16)[name = string("hidden_states_65_cast_fp16")]; fp16 var_2513_promoted_to_fp16 = const()[name = string("op_2513_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2519_cast_fp16 = pow(x = hidden_states_65_cast_fp16, y = var_2513_promoted_to_fp16)[name = string("op_2519_cast_fp16")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_2519_cast_fp16)[name = string("variance_33_cast_fp16")]; tensor const_41_to_fp16 = const()[name = string("const_41_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62964672)))]; tensor var_2523_cast_fp16 = mul(x = const_41_to_fp16, y = hidden_states_65_cast_fp16)[name = string("op_2523_cast_fp16")]; fp16 var_2524_to_fp16 = const()[name = string("op_2524_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2525_cast_fp16 = add(x = variance_33_cast_fp16, y = var_2524_to_fp16)[name = string("op_2525_cast_fp16")]; fp32 var_2526_epsilon_0 = const()[name = string("op_2526_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2526_cast_fp16 = rsqrt(epsilon = var_2526_epsilon_0, x = var_2525_cast_fp16)[name = string("op_2526_cast_fp16")]; tensor input_41_cast_fp16 = mul(x = var_2523_cast_fp16, y = var_2526_cast_fp16)[name = string("input_41_cast_fp16")]; tensor layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62966784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064000))))[name = string("layers_4_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor var_2535 = const()[name = string("op_2535"), val = tensor([1, 1, 16, 128])]; tensor var_2536_cast_fp16 = reshape(shape = var_2535, x = linear_28_cast_fp16)[name = string("op_2536_cast_fp16")]; tensor layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65064576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113216))))[name = string("layers_4_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor var_2547 = const()[name = string("op_2547"), val = tensor([1, 1, 8, 128])]; tensor var_2548_cast_fp16 = reshape(shape = var_2547, x = linear_29_cast_fp16)[name = string("op_2548_cast_fp16")]; tensor layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66113792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67162432))))[name = string("layers_4_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("linear_30_cast_fp16")]; fp16 var_2567_promoted_to_fp16 = const()[name = string("op_2567_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2573_cast_fp16 = pow(x = var_2536_cast_fp16, y = var_2567_promoted_to_fp16)[name = string("op_2573_cast_fp16")]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor const_298 = const()[name = string("const_298"), val = tensor([3])]; tensor variance_35_cast_fp16 = reduce_mean(axes = const_298, keep_dims = variance_35_keep_dims_0, x = var_2573_cast_fp16)[name = string("variance_35_cast_fp16")]; tensor const_299_to_fp16 = const()[name = string("const_299_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163008)))]; tensor var_2577_cast_fp16 = mul(x = const_299_to_fp16, y = var_2536_cast_fp16)[name = string("op_2577_cast_fp16")]; fp16 var_2578_to_fp16 = const()[name = string("op_2578_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2579_cast_fp16 = add(x = variance_35_cast_fp16, y = var_2578_to_fp16)[name = string("op_2579_cast_fp16")]; fp32 var_2580_epsilon_0 = const()[name = string("op_2580_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2580_cast_fp16 = rsqrt(epsilon = var_2580_epsilon_0, x = var_2579_cast_fp16)[name = string("op_2580_cast_fp16")]; tensor q_25_cast_fp16 = mul(x = var_2577_cast_fp16, y = var_2580_cast_fp16)[name = string("q_25_cast_fp16")]; fp16 var_2585_promoted_to_fp16 = const()[name = string("op_2585_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2591_cast_fp16 = pow(x = var_2548_cast_fp16, y = var_2585_promoted_to_fp16)[name = string("op_2591_cast_fp16")]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor const_300 = const()[name = string("const_300"), val = tensor([3])]; tensor variance_37_cast_fp16 = reduce_mean(axes = const_300, keep_dims = variance_37_keep_dims_0, x = var_2591_cast_fp16)[name = string("variance_37_cast_fp16")]; tensor const_301_to_fp16 = const()[name = string("const_301_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163328)))]; tensor var_2595_cast_fp16 = mul(x = const_301_to_fp16, y = var_2548_cast_fp16)[name = string("op_2595_cast_fp16")]; fp16 var_2596_to_fp16 = const()[name = string("op_2596_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2597_cast_fp16 = add(x = variance_37_cast_fp16, y = var_2596_to_fp16)[name = string("op_2597_cast_fp16")]; fp32 var_2598_epsilon_0 = const()[name = string("op_2598_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2598_cast_fp16 = rsqrt(epsilon = var_2598_epsilon_0, x = var_2597_cast_fp16)[name = string("op_2598_cast_fp16")]; tensor k_25_cast_fp16 = mul(x = var_2595_cast_fp16, y = var_2598_cast_fp16)[name = string("k_25_cast_fp16")]; tensor var_2613_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2613_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_25_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_25_cast_fp16)[name = string("x2_17_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2634_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_2634_cast_fp16")]; int32 var_2636 = const()[name = string("op_2636"), val = int32(-1)]; bool var_2637_interleave_0 = const()[name = string("op_2637_interleave_0"), val = bool(false)]; tensor var_2637_cast_fp16 = concat(axis = var_2636, interleave = var_2637_interleave_0, values = (var_2634_cast_fp16, x1_17_cast_fp16))[name = string("op_2637_cast_fp16")]; tensor var_2638_cast_fp16 = mul(x = var_2637_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2638_cast_fp16")]; tensor q_29_cast_fp16 = add(x = var_2613_cast_fp16, y = var_2638_cast_fp16)[name = string("q_29_cast_fp16")]; tensor var_2641_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2641_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_25_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_25_cast_fp16)[name = string("x2_19_cast_fp16")]; fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2662_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_2662_cast_fp16")]; int32 var_2664 = const()[name = string("op_2664"), val = int32(-1)]; bool var_2665_interleave_0 = const()[name = string("op_2665_interleave_0"), val = bool(false)]; tensor var_2665_cast_fp16 = concat(axis = var_2664, interleave = var_2665_interleave_0, values = (var_2662_cast_fp16, x1_19_cast_fp16))[name = string("op_2665_cast_fp16")]; tensor var_2666_cast_fp16 = mul(x = var_2665_cast_fp16, y = sin_1_cast_fp16)[name = string("op_2666_cast_fp16")]; tensor k_29_cast_fp16 = add(x = var_2641_cast_fp16, y = var_2666_cast_fp16)[name = string("k_29_cast_fp16")]; tensor var_2673 = const()[name = string("op_2673"), val = tensor([1, 1024, 1, 1])]; tensor nk_9_cast_fp16 = reshape(shape = var_2673, x = k_29_cast_fp16)[name = string("nk_9_cast_fp16")]; tensor var_2679 = const()[name = string("op_2679"), val = tensor([1, 1024, 1, 1])]; tensor nv_9_cast_fp16 = reshape(shape = var_2679, x = linear_30_cast_fp16)[name = string("nv_9_cast_fp16")]; tensor var_2684_cast_fp16 = mul(x = var_2473_cast_fp16, y = var_1203_cast_fp16)[name = string("op_2684_cast_fp16")]; tensor var_2685_cast_fp16 = mul(x = nk_9_cast_fp16, y = update_mask_cast_fp16)[name = string("op_2685_cast_fp16")]; tensor lkc_19_cast_fp16 = add(x = var_2684_cast_fp16, y = var_2685_cast_fp16)[name = string("lkc_19_cast_fp16")]; tensor var_2691_cast_fp16 = mul(x = var_2493_cast_fp16, y = var_1203_cast_fp16)[name = string("op_2691_cast_fp16")]; tensor var_2692_cast_fp16 = mul(x = nv_9_cast_fp16, y = update_mask_cast_fp16)[name = string("op_2692_cast_fp16")]; tensor lvc_19_cast_fp16 = add(x = var_2691_cast_fp16, y = var_2692_cast_fp16)[name = string("lvc_19_cast_fp16")]; tensor var_2696_axes_0 = const()[name = string("op_2696_axes_0"), val = tensor([2])]; tensor var_2696_cast_fp16 = squeeze(axes = var_2696_axes_0, x = lkc_19_cast_fp16)[name = string("op_2696_cast_fp16")]; tensor var_2701 = const()[name = string("op_2701"), val = tensor([1, 8, 128, 256])]; tensor kc_17_cast_fp16 = reshape(shape = var_2701, x = var_2696_cast_fp16)[name = string("kc_17_cast_fp16")]; tensor var_2704_axes_0 = const()[name = string("op_2704_axes_0"), val = tensor([2])]; tensor var_2704_cast_fp16 = squeeze(axes = var_2704_axes_0, x = lvc_19_cast_fp16)[name = string("op_2704_cast_fp16")]; tensor var_2709 = const()[name = string("op_2709"), val = tensor([1, 8, 128, 256])]; tensor vc_17_cast_fp16 = reshape(shape = var_2709, x = var_2704_cast_fp16)[name = string("vc_17_cast_fp16")]; tensor var_2712_axes_0 = const()[name = string("op_2712_axes_0"), val = tensor([2])]; tensor var_2712_cast_fp16 = expand_dims(axes = var_2712_axes_0, x = kc_17_cast_fp16)[name = string("op_2712_cast_fp16")]; tensor var_2720_reps_0 = const()[name = string("op_2720_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2720_cast_fp16 = tile(reps = var_2720_reps_0, x = var_2712_cast_fp16)[name = string("op_2720_cast_fp16")]; tensor var_2725 = const()[name = string("op_2725"), val = tensor([1, 16, 128, 256])]; tensor kc_19_cast_fp16 = reshape(shape = var_2725, x = var_2720_cast_fp16)[name = string("kc_19_cast_fp16")]; tensor var_2728_axes_0 = const()[name = string("op_2728_axes_0"), val = tensor([2])]; tensor var_2728_cast_fp16 = expand_dims(axes = var_2728_axes_0, x = vc_17_cast_fp16)[name = string("op_2728_cast_fp16")]; tensor var_2736_reps_0 = const()[name = string("op_2736_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_2736_cast_fp16 = tile(reps = var_2736_reps_0, x = var_2728_cast_fp16)[name = string("op_2736_cast_fp16")]; tensor var_2741 = const()[name = string("op_2741"), val = tensor([1, 16, 128, 256])]; tensor vc_19_cast_fp16 = reshape(shape = var_2741, x = var_2736_cast_fp16)[name = string("vc_19_cast_fp16")]; tensor var_2745_perm_0 = const()[name = string("op_2745_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_2746_transpose_x_0 = const()[name = string("op_2746_transpose_x_0"), val = bool(false)]; bool var_2746_transpose_y_0 = const()[name = string("op_2746_transpose_y_0"), val = bool(false)]; tensor var_2745_cast_fp16 = transpose(perm = var_2745_perm_0, x = q_29_cast_fp16)[name = string("transpose_47")]; tensor var_2746_cast_fp16 = matmul(transpose_x = var_2746_transpose_x_0, transpose_y = var_2746_transpose_y_0, x = var_2745_cast_fp16, y = kc_19_cast_fp16)[name = string("op_2746_cast_fp16")]; fp16 _inversed_aw_33_y_0_to_fp16 = const()[name = string("_inversed_aw_33_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_33_cast_fp16 = mul(x = var_2746_cast_fp16, y = _inversed_aw_33_y_0_to_fp16)[name = string("_inversed_aw_33_cast_fp16")]; tensor aw_35_cast_fp16 = add(x = _inversed_aw_33_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_35_cast_fp16")]; int32 var_2760 = const()[name = string("op_2760"), val = int32(-1)]; tensor aw_39_cast_fp16 = softmax(axis = var_2760, x = aw_35_cast_fp16)[name = string("aw_39_cast_fp16")]; bool var_2766_transpose_x_1 = const()[name = string("op_2766_transpose_x_1"), val = bool(false)]; bool var_2766_transpose_y_1 = const()[name = string("op_2766_transpose_y_1"), val = bool(true)]; tensor var_2766_cast_fp16 = matmul(transpose_x = var_2766_transpose_x_1, transpose_y = var_2766_transpose_y_1, x = aw_39_cast_fp16, y = vc_19_cast_fp16)[name = string("op_2766_cast_fp16")]; tensor var_2769_perm_0 = const()[name = string("op_2769_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2773 = const()[name = string("op_2773"), val = tensor([1, 1, -1])]; tensor var_2769_cast_fp16 = transpose(perm = var_2769_perm_0, x = var_2766_cast_fp16)[name = string("transpose_46")]; tensor input_43_cast_fp16 = reshape(shape = var_2773, x = var_2769_cast_fp16)[name = string("input_43_cast_fp16")]; tensor layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67163648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69260864))))[name = string("layers_4_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor var_2779_axes_0 = const()[name = string("op_2779_axes_0"), val = tensor([0])]; tensor var_2779_cast_fp16 = squeeze(axes = var_2779_axes_0, x = linear_31_cast_fp16)[name = string("op_2779_cast_fp16")]; tensor var_2781_axes_0 = const()[name = string("op_2781_axes_0"), val = tensor([0])]; tensor var_2781_cast_fp16 = squeeze(axes = var_2781_axes_0, x = var_2779_cast_fp16)[name = string("op_2781_cast_fp16")]; tensor var_2783_axes_0 = const()[name = string("op_2783_axes_0"), val = tensor([-1])]; tensor var_2783_cast_fp16 = expand_dims(axes = var_2783_axes_0, x = var_2781_cast_fp16)[name = string("op_2783_cast_fp16")]; tensor ao_9_axes_0 = const()[name = string("ao_9_axes_0"), val = tensor([-1])]; tensor ao_9_cast_fp16 = expand_dims(axes = ao_9_axes_0, x = var_2783_cast_fp16)[name = string("ao_9_cast_fp16")]; tensor hidden_17_cast_fp16 = add(x = hidden_15_cast_fp16, y = ao_9_cast_fp16)[name = string("hidden_17_cast_fp16")]; tensor var_2789_axes_0 = const()[name = string("op_2789_axes_0"), val = tensor([-1])]; tensor var_2789_cast_fp16 = squeeze(axes = var_2789_axes_0, x = hidden_17_cast_fp16)[name = string("op_2789_cast_fp16")]; tensor var_2791_axes_0 = const()[name = string("op_2791_axes_0"), val = tensor([-1])]; tensor var_2791_cast_fp16 = squeeze(axes = var_2791_axes_0, x = var_2789_cast_fp16)[name = string("op_2791_cast_fp16")]; tensor hidden_states_77_axes_0 = const()[name = string("hidden_states_77_axes_0"), val = tensor([0])]; tensor hidden_states_77_cast_fp16 = expand_dims(axes = hidden_states_77_axes_0, x = var_2791_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; fp16 var_2797_promoted_to_fp16 = const()[name = string("op_2797_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2803_cast_fp16 = pow(x = hidden_states_77_cast_fp16, y = var_2797_promoted_to_fp16)[name = string("op_2803_cast_fp16")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_2803_cast_fp16)[name = string("variance_39_cast_fp16")]; tensor const_50_to_fp16 = const()[name = string("const_50_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69261440)))]; tensor var_2807_cast_fp16 = mul(x = const_50_to_fp16, y = hidden_states_77_cast_fp16)[name = string("op_2807_cast_fp16")]; fp16 var_2808_to_fp16 = const()[name = string("op_2808_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2809_cast_fp16 = add(x = variance_39_cast_fp16, y = var_2808_to_fp16)[name = string("op_2809_cast_fp16")]; fp32 var_2810_epsilon_0 = const()[name = string("op_2810_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2810_cast_fp16 = rsqrt(epsilon = var_2810_epsilon_0, x = var_2809_cast_fp16)[name = string("op_2810_cast_fp16")]; tensor input_45_cast_fp16 = mul(x = var_2807_cast_fp16, y = var_2810_cast_fp16)[name = string("input_45_cast_fp16")]; tensor layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69263552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409344))))[name = string("layers_4_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_2818_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_2818_cast_fp16")]; tensor layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72409920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75555712))))[name = string("layers_4_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_4_mlp_up_proj_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_49_cast_fp16 = mul(x = var_2818_cast_fp16, y = linear_33_cast_fp16)[name = string("input_49_cast_fp16")]; tensor layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702080))))[name = string("layers_4_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_2825_axes_0 = const()[name = string("op_2825_axes_0"), val = tensor([0])]; tensor var_2825_cast_fp16 = squeeze(axes = var_2825_axes_0, x = linear_34_cast_fp16)[name = string("op_2825_cast_fp16")]; tensor var_2827_axes_0 = const()[name = string("op_2827_axes_0"), val = tensor([0])]; tensor var_2827_cast_fp16 = squeeze(axes = var_2827_axes_0, x = var_2825_cast_fp16)[name = string("op_2827_cast_fp16")]; tensor var_2829_axes_0 = const()[name = string("op_2829_axes_0"), val = tensor([-1])]; tensor var_2829_cast_fp16 = expand_dims(axes = var_2829_axes_0, x = var_2827_cast_fp16)[name = string("op_2829_cast_fp16")]; tensor h_9_axes_0 = const()[name = string("h_9_axes_0"), val = tensor([-1])]; tensor h_9_cast_fp16 = expand_dims(axes = h_9_axes_0, x = var_2829_cast_fp16)[name = string("h_9_cast_fp16")]; tensor hidden_19_cast_fp16 = add(x = hidden_17_cast_fp16, y = h_9_cast_fp16)[name = string("hidden_19_cast_fp16")]; tensor var_2843_begin_0 = const()[name = string("op_2843_begin_0"), val = tensor([0, 5120, 0, 0])]; tensor var_2843_end_0 = const()[name = string("op_2843_end_0"), val = tensor([1, 6144, 1, 256])]; tensor var_2843_end_mask_0 = const()[name = string("op_2843_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2843_cast_fp16 = slice_by_index(begin = var_2843_begin_0, end = var_2843_end_0, end_mask = var_2843_end_mask_0, x = key_cache)[name = string("op_2843_cast_fp16")]; tensor var_2863_begin_0 = const()[name = string("op_2863_begin_0"), val = tensor([0, 5120, 0, 0])]; tensor var_2863_end_0 = const()[name = string("op_2863_end_0"), val = tensor([1, 6144, 1, 256])]; tensor var_2863_end_mask_0 = const()[name = string("op_2863_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2863_cast_fp16 = slice_by_index(begin = var_2863_begin_0, end = var_2863_end_0, end_mask = var_2863_end_mask_0, x = value_cache)[name = string("op_2863_cast_fp16")]; tensor var_2875_axes_0 = const()[name = string("op_2875_axes_0"), val = tensor([-1])]; tensor var_2875_cast_fp16 = squeeze(axes = var_2875_axes_0, x = hidden_19_cast_fp16)[name = string("op_2875_cast_fp16")]; tensor var_2877_axes_0 = const()[name = string("op_2877_axes_0"), val = tensor([-1])]; tensor var_2877_cast_fp16 = squeeze(axes = var_2877_axes_0, x = var_2875_cast_fp16)[name = string("op_2877_cast_fp16")]; tensor hidden_states_81_axes_0 = const()[name = string("hidden_states_81_axes_0"), val = tensor([0])]; tensor hidden_states_81_cast_fp16 = expand_dims(axes = hidden_states_81_axes_0, x = var_2877_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; fp16 var_2883_promoted_to_fp16 = const()[name = string("op_2883_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2889_cast_fp16 = pow(x = hidden_states_81_cast_fp16, y = var_2883_promoted_to_fp16)[name = string("op_2889_cast_fp16")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_2889_cast_fp16)[name = string("variance_41_cast_fp16")]; tensor const_51_to_fp16 = const()[name = string("const_51_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78702656)))]; tensor var_2893_cast_fp16 = mul(x = const_51_to_fp16, y = hidden_states_81_cast_fp16)[name = string("op_2893_cast_fp16")]; fp16 var_2894_to_fp16 = const()[name = string("op_2894_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2895_cast_fp16 = add(x = variance_41_cast_fp16, y = var_2894_to_fp16)[name = string("op_2895_cast_fp16")]; fp32 var_2896_epsilon_0 = const()[name = string("op_2896_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2896_cast_fp16 = rsqrt(epsilon = var_2896_epsilon_0, x = var_2895_cast_fp16)[name = string("op_2896_cast_fp16")]; tensor input_51_cast_fp16 = mul(x = var_2893_cast_fp16, y = var_2896_cast_fp16)[name = string("input_51_cast_fp16")]; tensor layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78704768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80801984))))[name = string("layers_5_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor var_2905 = const()[name = string("op_2905"), val = tensor([1, 1, 16, 128])]; tensor var_2906_cast_fp16 = reshape(shape = var_2905, x = linear_35_cast_fp16)[name = string("op_2906_cast_fp16")]; tensor layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80802560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851200))))[name = string("layers_5_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor var_2917 = const()[name = string("op_2917"), val = tensor([1, 1, 8, 128])]; tensor var_2918_cast_fp16 = reshape(shape = var_2917, x = linear_36_cast_fp16)[name = string("op_2918_cast_fp16")]; tensor layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81851776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900416))))[name = string("layers_5_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("linear_37_cast_fp16")]; fp16 var_2937_promoted_to_fp16 = const()[name = string("op_2937_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2943_cast_fp16 = pow(x = var_2906_cast_fp16, y = var_2937_promoted_to_fp16)[name = string("op_2943_cast_fp16")]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; tensor const_302 = const()[name = string("const_302"), val = tensor([3])]; tensor variance_43_cast_fp16 = reduce_mean(axes = const_302, keep_dims = variance_43_keep_dims_0, x = var_2943_cast_fp16)[name = string("variance_43_cast_fp16")]; tensor const_303_to_fp16 = const()[name = string("const_303_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82900992)))]; tensor var_2947_cast_fp16 = mul(x = const_303_to_fp16, y = var_2906_cast_fp16)[name = string("op_2947_cast_fp16")]; fp16 var_2948_to_fp16 = const()[name = string("op_2948_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2949_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2948_to_fp16)[name = string("op_2949_cast_fp16")]; fp32 var_2950_epsilon_0 = const()[name = string("op_2950_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2950_cast_fp16 = rsqrt(epsilon = var_2950_epsilon_0, x = var_2949_cast_fp16)[name = string("op_2950_cast_fp16")]; tensor q_31_cast_fp16 = mul(x = var_2947_cast_fp16, y = var_2950_cast_fp16)[name = string("q_31_cast_fp16")]; fp16 var_2955_promoted_to_fp16 = const()[name = string("op_2955_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_2961_cast_fp16 = pow(x = var_2918_cast_fp16, y = var_2955_promoted_to_fp16)[name = string("op_2961_cast_fp16")]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; tensor const_304 = const()[name = string("const_304"), val = tensor([3])]; tensor variance_45_cast_fp16 = reduce_mean(axes = const_304, keep_dims = variance_45_keep_dims_0, x = var_2961_cast_fp16)[name = string("variance_45_cast_fp16")]; tensor const_305_to_fp16 = const()[name = string("const_305_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901312)))]; tensor var_2965_cast_fp16 = mul(x = const_305_to_fp16, y = var_2918_cast_fp16)[name = string("op_2965_cast_fp16")]; fp16 var_2966_to_fp16 = const()[name = string("op_2966_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2967_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2966_to_fp16)[name = string("op_2967_cast_fp16")]; fp32 var_2968_epsilon_0 = const()[name = string("op_2968_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2968_cast_fp16 = rsqrt(epsilon = var_2968_epsilon_0, x = var_2967_cast_fp16)[name = string("op_2968_cast_fp16")]; tensor k_31_cast_fp16 = mul(x = var_2965_cast_fp16, y = var_2968_cast_fp16)[name = string("k_31_cast_fp16")]; tensor var_2983_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_2983_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_31_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_31_cast_fp16)[name = string("x2_21_cast_fp16")]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3004_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_3004_cast_fp16")]; int32 var_3006 = const()[name = string("op_3006"), val = int32(-1)]; bool var_3007_interleave_0 = const()[name = string("op_3007_interleave_0"), val = bool(false)]; tensor var_3007_cast_fp16 = concat(axis = var_3006, interleave = var_3007_interleave_0, values = (var_3004_cast_fp16, x1_21_cast_fp16))[name = string("op_3007_cast_fp16")]; tensor var_3008_cast_fp16 = mul(x = var_3007_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3008_cast_fp16")]; tensor q_35_cast_fp16 = add(x = var_2983_cast_fp16, y = var_3008_cast_fp16)[name = string("q_35_cast_fp16")]; tensor var_3011_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3011_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_31_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_31_cast_fp16)[name = string("x2_23_cast_fp16")]; fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3032_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_3032_cast_fp16")]; int32 var_3034 = const()[name = string("op_3034"), val = int32(-1)]; bool var_3035_interleave_0 = const()[name = string("op_3035_interleave_0"), val = bool(false)]; tensor var_3035_cast_fp16 = concat(axis = var_3034, interleave = var_3035_interleave_0, values = (var_3032_cast_fp16, x1_23_cast_fp16))[name = string("op_3035_cast_fp16")]; tensor var_3036_cast_fp16 = mul(x = var_3035_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3036_cast_fp16")]; tensor k_35_cast_fp16 = add(x = var_3011_cast_fp16, y = var_3036_cast_fp16)[name = string("k_35_cast_fp16")]; tensor var_3043 = const()[name = string("op_3043"), val = tensor([1, 1024, 1, 1])]; tensor nk_11_cast_fp16 = reshape(shape = var_3043, x = k_35_cast_fp16)[name = string("nk_11_cast_fp16")]; tensor var_3049 = const()[name = string("op_3049"), val = tensor([1, 1024, 1, 1])]; tensor nv_11_cast_fp16 = reshape(shape = var_3049, x = linear_37_cast_fp16)[name = string("nv_11_cast_fp16")]; tensor var_3054_cast_fp16 = mul(x = var_2843_cast_fp16, y = var_1203_cast_fp16)[name = string("op_3054_cast_fp16")]; tensor var_3055_cast_fp16 = mul(x = nk_11_cast_fp16, y = update_mask_cast_fp16)[name = string("op_3055_cast_fp16")]; tensor lkc_23_cast_fp16 = add(x = var_3054_cast_fp16, y = var_3055_cast_fp16)[name = string("lkc_23_cast_fp16")]; tensor var_3061_cast_fp16 = mul(x = var_2863_cast_fp16, y = var_1203_cast_fp16)[name = string("op_3061_cast_fp16")]; tensor var_3062_cast_fp16 = mul(x = nv_11_cast_fp16, y = update_mask_cast_fp16)[name = string("op_3062_cast_fp16")]; tensor lvc_23_cast_fp16 = add(x = var_3061_cast_fp16, y = var_3062_cast_fp16)[name = string("lvc_23_cast_fp16")]; tensor var_3066_axes_0 = const()[name = string("op_3066_axes_0"), val = tensor([2])]; tensor var_3066_cast_fp16 = squeeze(axes = var_3066_axes_0, x = lkc_23_cast_fp16)[name = string("op_3066_cast_fp16")]; tensor var_3071 = const()[name = string("op_3071"), val = tensor([1, 8, 128, 256])]; tensor kc_21_cast_fp16 = reshape(shape = var_3071, x = var_3066_cast_fp16)[name = string("kc_21_cast_fp16")]; tensor var_3074_axes_0 = const()[name = string("op_3074_axes_0"), val = tensor([2])]; tensor var_3074_cast_fp16 = squeeze(axes = var_3074_axes_0, x = lvc_23_cast_fp16)[name = string("op_3074_cast_fp16")]; tensor var_3079 = const()[name = string("op_3079"), val = tensor([1, 8, 128, 256])]; tensor vc_21_cast_fp16 = reshape(shape = var_3079, x = var_3074_cast_fp16)[name = string("vc_21_cast_fp16")]; tensor var_3082_axes_0 = const()[name = string("op_3082_axes_0"), val = tensor([2])]; tensor var_3082_cast_fp16 = expand_dims(axes = var_3082_axes_0, x = kc_21_cast_fp16)[name = string("op_3082_cast_fp16")]; tensor var_3090_reps_0 = const()[name = string("op_3090_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3090_cast_fp16 = tile(reps = var_3090_reps_0, x = var_3082_cast_fp16)[name = string("op_3090_cast_fp16")]; tensor var_3095 = const()[name = string("op_3095"), val = tensor([1, 16, 128, 256])]; tensor kc_23_cast_fp16 = reshape(shape = var_3095, x = var_3090_cast_fp16)[name = string("kc_23_cast_fp16")]; tensor var_3098_axes_0 = const()[name = string("op_3098_axes_0"), val = tensor([2])]; tensor var_3098_cast_fp16 = expand_dims(axes = var_3098_axes_0, x = vc_21_cast_fp16)[name = string("op_3098_cast_fp16")]; tensor var_3106_reps_0 = const()[name = string("op_3106_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3106_cast_fp16 = tile(reps = var_3106_reps_0, x = var_3098_cast_fp16)[name = string("op_3106_cast_fp16")]; tensor var_3111 = const()[name = string("op_3111"), val = tensor([1, 16, 128, 256])]; tensor vc_23_cast_fp16 = reshape(shape = var_3111, x = var_3106_cast_fp16)[name = string("vc_23_cast_fp16")]; tensor var_3115_perm_0 = const()[name = string("op_3115_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_3116_transpose_x_0 = const()[name = string("op_3116_transpose_x_0"), val = bool(false)]; bool var_3116_transpose_y_0 = const()[name = string("op_3116_transpose_y_0"), val = bool(false)]; tensor var_3115_cast_fp16 = transpose(perm = var_3115_perm_0, x = q_35_cast_fp16)[name = string("transpose_45")]; tensor var_3116_cast_fp16 = matmul(transpose_x = var_3116_transpose_x_0, transpose_y = var_3116_transpose_y_0, x = var_3115_cast_fp16, y = kc_23_cast_fp16)[name = string("op_3116_cast_fp16")]; fp16 _inversed_aw_41_y_0_to_fp16 = const()[name = string("_inversed_aw_41_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_41_cast_fp16 = mul(x = var_3116_cast_fp16, y = _inversed_aw_41_y_0_to_fp16)[name = string("_inversed_aw_41_cast_fp16")]; tensor aw_43_cast_fp16 = add(x = _inversed_aw_41_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_43_cast_fp16")]; int32 var_3130 = const()[name = string("op_3130"), val = int32(-1)]; tensor aw_47_cast_fp16 = softmax(axis = var_3130, x = aw_43_cast_fp16)[name = string("aw_47_cast_fp16")]; bool var_3136_transpose_x_1 = const()[name = string("op_3136_transpose_x_1"), val = bool(false)]; bool var_3136_transpose_y_1 = const()[name = string("op_3136_transpose_y_1"), val = bool(true)]; tensor var_3136_cast_fp16 = matmul(transpose_x = var_3136_transpose_x_1, transpose_y = var_3136_transpose_y_1, x = aw_47_cast_fp16, y = vc_23_cast_fp16)[name = string("op_3136_cast_fp16")]; tensor var_3139_perm_0 = const()[name = string("op_3139_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3143 = const()[name = string("op_3143"), val = tensor([1, 1, -1])]; tensor var_3139_cast_fp16 = transpose(perm = var_3139_perm_0, x = var_3136_cast_fp16)[name = string("transpose_44")]; tensor input_53_cast_fp16 = reshape(shape = var_3143, x = var_3139_cast_fp16)[name = string("input_53_cast_fp16")]; tensor layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82901632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84998848))))[name = string("layers_5_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor var_3149_axes_0 = const()[name = string("op_3149_axes_0"), val = tensor([0])]; tensor var_3149_cast_fp16 = squeeze(axes = var_3149_axes_0, x = linear_38_cast_fp16)[name = string("op_3149_cast_fp16")]; tensor var_3151_axes_0 = const()[name = string("op_3151_axes_0"), val = tensor([0])]; tensor var_3151_cast_fp16 = squeeze(axes = var_3151_axes_0, x = var_3149_cast_fp16)[name = string("op_3151_cast_fp16")]; tensor var_3153_axes_0 = const()[name = string("op_3153_axes_0"), val = tensor([-1])]; tensor var_3153_cast_fp16 = expand_dims(axes = var_3153_axes_0, x = var_3151_cast_fp16)[name = string("op_3153_cast_fp16")]; tensor ao_11_axes_0 = const()[name = string("ao_11_axes_0"), val = tensor([-1])]; tensor ao_11_cast_fp16 = expand_dims(axes = ao_11_axes_0, x = var_3153_cast_fp16)[name = string("ao_11_cast_fp16")]; tensor hidden_21_cast_fp16 = add(x = hidden_19_cast_fp16, y = ao_11_cast_fp16)[name = string("hidden_21_cast_fp16")]; tensor var_3159_axes_0 = const()[name = string("op_3159_axes_0"), val = tensor([-1])]; tensor var_3159_cast_fp16 = squeeze(axes = var_3159_axes_0, x = hidden_21_cast_fp16)[name = string("op_3159_cast_fp16")]; tensor var_3161_axes_0 = const()[name = string("op_3161_axes_0"), val = tensor([-1])]; tensor var_3161_cast_fp16 = squeeze(axes = var_3161_axes_0, x = var_3159_cast_fp16)[name = string("op_3161_cast_fp16")]; tensor hidden_states_93_axes_0 = const()[name = string("hidden_states_93_axes_0"), val = tensor([0])]; tensor hidden_states_93_cast_fp16 = expand_dims(axes = hidden_states_93_axes_0, x = var_3161_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; fp16 var_3167_promoted_to_fp16 = const()[name = string("op_3167_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3173_cast_fp16 = pow(x = hidden_states_93_cast_fp16, y = var_3167_promoted_to_fp16)[name = string("op_3173_cast_fp16")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_3173_cast_fp16)[name = string("variance_47_cast_fp16")]; tensor const_60_to_fp16 = const()[name = string("const_60_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84999424)))]; tensor var_3177_cast_fp16 = mul(x = const_60_to_fp16, y = hidden_states_93_cast_fp16)[name = string("op_3177_cast_fp16")]; fp16 var_3178_to_fp16 = const()[name = string("op_3178_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3179_cast_fp16 = add(x = variance_47_cast_fp16, y = var_3178_to_fp16)[name = string("op_3179_cast_fp16")]; fp32 var_3180_epsilon_0 = const()[name = string("op_3180_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3180_cast_fp16 = rsqrt(epsilon = var_3180_epsilon_0, x = var_3179_cast_fp16)[name = string("op_3180_cast_fp16")]; tensor input_55_cast_fp16 = mul(x = var_3177_cast_fp16, y = var_3180_cast_fp16)[name = string("input_55_cast_fp16")]; tensor layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85001536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147328))))[name = string("layers_5_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_3188_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_3188_cast_fp16")]; tensor layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88147904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91293696))))[name = string("layers_5_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_5_mlp_up_proj_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_59_cast_fp16 = mul(x = var_3188_cast_fp16, y = linear_40_cast_fp16)[name = string("input_59_cast_fp16")]; tensor layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91294272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440064))))[name = string("layers_5_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor var_3195_axes_0 = const()[name = string("op_3195_axes_0"), val = tensor([0])]; tensor var_3195_cast_fp16 = squeeze(axes = var_3195_axes_0, x = linear_41_cast_fp16)[name = string("op_3195_cast_fp16")]; tensor var_3197_axes_0 = const()[name = string("op_3197_axes_0"), val = tensor([0])]; tensor var_3197_cast_fp16 = squeeze(axes = var_3197_axes_0, x = var_3195_cast_fp16)[name = string("op_3197_cast_fp16")]; tensor var_3199_axes_0 = const()[name = string("op_3199_axes_0"), val = tensor([-1])]; tensor var_3199_cast_fp16 = expand_dims(axes = var_3199_axes_0, x = var_3197_cast_fp16)[name = string("op_3199_cast_fp16")]; tensor h_11_axes_0 = const()[name = string("h_11_axes_0"), val = tensor([-1])]; tensor h_11_cast_fp16 = expand_dims(axes = h_11_axes_0, x = var_3199_cast_fp16)[name = string("h_11_cast_fp16")]; tensor hidden_23_cast_fp16 = add(x = hidden_21_cast_fp16, y = h_11_cast_fp16)[name = string("hidden_23_cast_fp16")]; tensor var_3213_begin_0 = const()[name = string("op_3213_begin_0"), val = tensor([0, 6144, 0, 0])]; tensor var_3213_end_0 = const()[name = string("op_3213_end_0"), val = tensor([1, 7168, 1, 256])]; tensor var_3213_end_mask_0 = const()[name = string("op_3213_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3213_cast_fp16 = slice_by_index(begin = var_3213_begin_0, end = var_3213_end_0, end_mask = var_3213_end_mask_0, x = key_cache)[name = string("op_3213_cast_fp16")]; tensor var_3233_begin_0 = const()[name = string("op_3233_begin_0"), val = tensor([0, 6144, 0, 0])]; tensor var_3233_end_0 = const()[name = string("op_3233_end_0"), val = tensor([1, 7168, 1, 256])]; tensor var_3233_end_mask_0 = const()[name = string("op_3233_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3233_cast_fp16 = slice_by_index(begin = var_3233_begin_0, end = var_3233_end_0, end_mask = var_3233_end_mask_0, x = value_cache)[name = string("op_3233_cast_fp16")]; tensor var_3245_axes_0 = const()[name = string("op_3245_axes_0"), val = tensor([-1])]; tensor var_3245_cast_fp16 = squeeze(axes = var_3245_axes_0, x = hidden_23_cast_fp16)[name = string("op_3245_cast_fp16")]; tensor var_3247_axes_0 = const()[name = string("op_3247_axes_0"), val = tensor([-1])]; tensor var_3247_cast_fp16 = squeeze(axes = var_3247_axes_0, x = var_3245_cast_fp16)[name = string("op_3247_cast_fp16")]; tensor hidden_states_97_axes_0 = const()[name = string("hidden_states_97_axes_0"), val = tensor([0])]; tensor hidden_states_97_cast_fp16 = expand_dims(axes = hidden_states_97_axes_0, x = var_3247_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; fp16 var_3253_promoted_to_fp16 = const()[name = string("op_3253_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3259_cast_fp16 = pow(x = hidden_states_97_cast_fp16, y = var_3253_promoted_to_fp16)[name = string("op_3259_cast_fp16")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_3259_cast_fp16)[name = string("variance_49_cast_fp16")]; tensor const_61_to_fp16 = const()[name = string("const_61_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94440640)))]; tensor var_3263_cast_fp16 = mul(x = const_61_to_fp16, y = hidden_states_97_cast_fp16)[name = string("op_3263_cast_fp16")]; fp16 var_3264_to_fp16 = const()[name = string("op_3264_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3265_cast_fp16 = add(x = variance_49_cast_fp16, y = var_3264_to_fp16)[name = string("op_3265_cast_fp16")]; fp32 var_3266_epsilon_0 = const()[name = string("op_3266_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3266_cast_fp16 = rsqrt(epsilon = var_3266_epsilon_0, x = var_3265_cast_fp16)[name = string("op_3266_cast_fp16")]; tensor input_61_cast_fp16 = mul(x = var_3263_cast_fp16, y = var_3266_cast_fp16)[name = string("input_61_cast_fp16")]; tensor layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94442752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96539968))))[name = string("layers_6_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_3275 = const()[name = string("op_3275"), val = tensor([1, 1, 16, 128])]; tensor var_3276_cast_fp16 = reshape(shape = var_3275, x = linear_42_cast_fp16)[name = string("op_3276_cast_fp16")]; tensor layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96540544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589184))))[name = string("layers_6_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor var_3287 = const()[name = string("op_3287"), val = tensor([1, 1, 8, 128])]; tensor var_3288_cast_fp16 = reshape(shape = var_3287, x = linear_43_cast_fp16)[name = string("op_3288_cast_fp16")]; tensor layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97589760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638400))))[name = string("layers_6_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_44_cast_fp16")]; fp16 var_3307_promoted_to_fp16 = const()[name = string("op_3307_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3313_cast_fp16 = pow(x = var_3276_cast_fp16, y = var_3307_promoted_to_fp16)[name = string("op_3313_cast_fp16")]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; tensor const_306 = const()[name = string("const_306"), val = tensor([3])]; tensor variance_51_cast_fp16 = reduce_mean(axes = const_306, keep_dims = variance_51_keep_dims_0, x = var_3313_cast_fp16)[name = string("variance_51_cast_fp16")]; tensor const_307_to_fp16 = const()[name = string("const_307_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98638976)))]; tensor var_3317_cast_fp16 = mul(x = const_307_to_fp16, y = var_3276_cast_fp16)[name = string("op_3317_cast_fp16")]; fp16 var_3318_to_fp16 = const()[name = string("op_3318_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3319_cast_fp16 = add(x = variance_51_cast_fp16, y = var_3318_to_fp16)[name = string("op_3319_cast_fp16")]; fp32 var_3320_epsilon_0 = const()[name = string("op_3320_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3320_cast_fp16 = rsqrt(epsilon = var_3320_epsilon_0, x = var_3319_cast_fp16)[name = string("op_3320_cast_fp16")]; tensor q_37_cast_fp16 = mul(x = var_3317_cast_fp16, y = var_3320_cast_fp16)[name = string("q_37_cast_fp16")]; fp16 var_3325_promoted_to_fp16 = const()[name = string("op_3325_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3331_cast_fp16 = pow(x = var_3288_cast_fp16, y = var_3325_promoted_to_fp16)[name = string("op_3331_cast_fp16")]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; tensor const_308 = const()[name = string("const_308"), val = tensor([3])]; tensor variance_53_cast_fp16 = reduce_mean(axes = const_308, keep_dims = variance_53_keep_dims_0, x = var_3331_cast_fp16)[name = string("variance_53_cast_fp16")]; tensor const_309_to_fp16 = const()[name = string("const_309_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639296)))]; tensor var_3335_cast_fp16 = mul(x = const_309_to_fp16, y = var_3288_cast_fp16)[name = string("op_3335_cast_fp16")]; fp16 var_3336_to_fp16 = const()[name = string("op_3336_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3337_cast_fp16 = add(x = variance_53_cast_fp16, y = var_3336_to_fp16)[name = string("op_3337_cast_fp16")]; fp32 var_3338_epsilon_0 = const()[name = string("op_3338_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3338_cast_fp16 = rsqrt(epsilon = var_3338_epsilon_0, x = var_3337_cast_fp16)[name = string("op_3338_cast_fp16")]; tensor k_37_cast_fp16 = mul(x = var_3335_cast_fp16, y = var_3338_cast_fp16)[name = string("k_37_cast_fp16")]; tensor var_3353_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3353_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_37_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_37_cast_fp16)[name = string("x2_25_cast_fp16")]; fp16 const_66_promoted_to_fp16 = const()[name = string("const_66_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3374_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_66_promoted_to_fp16)[name = string("op_3374_cast_fp16")]; int32 var_3376 = const()[name = string("op_3376"), val = int32(-1)]; bool var_3377_interleave_0 = const()[name = string("op_3377_interleave_0"), val = bool(false)]; tensor var_3377_cast_fp16 = concat(axis = var_3376, interleave = var_3377_interleave_0, values = (var_3374_cast_fp16, x1_25_cast_fp16))[name = string("op_3377_cast_fp16")]; tensor var_3378_cast_fp16 = mul(x = var_3377_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3378_cast_fp16")]; tensor q_41_cast_fp16 = add(x = var_3353_cast_fp16, y = var_3378_cast_fp16)[name = string("q_41_cast_fp16")]; tensor var_3381_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3381_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_37_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_37_cast_fp16)[name = string("x2_27_cast_fp16")]; fp16 const_69_promoted_to_fp16 = const()[name = string("const_69_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3402_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_69_promoted_to_fp16)[name = string("op_3402_cast_fp16")]; int32 var_3404 = const()[name = string("op_3404"), val = int32(-1)]; bool var_3405_interleave_0 = const()[name = string("op_3405_interleave_0"), val = bool(false)]; tensor var_3405_cast_fp16 = concat(axis = var_3404, interleave = var_3405_interleave_0, values = (var_3402_cast_fp16, x1_27_cast_fp16))[name = string("op_3405_cast_fp16")]; tensor var_3406_cast_fp16 = mul(x = var_3405_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3406_cast_fp16")]; tensor k_41_cast_fp16 = add(x = var_3381_cast_fp16, y = var_3406_cast_fp16)[name = string("k_41_cast_fp16")]; tensor var_3413 = const()[name = string("op_3413"), val = tensor([1, 1024, 1, 1])]; tensor nk_13_cast_fp16 = reshape(shape = var_3413, x = k_41_cast_fp16)[name = string("nk_13_cast_fp16")]; tensor var_3419 = const()[name = string("op_3419"), val = tensor([1, 1024, 1, 1])]; tensor nv_13_cast_fp16 = reshape(shape = var_3419, x = linear_44_cast_fp16)[name = string("nv_13_cast_fp16")]; tensor var_3424_cast_fp16 = mul(x = var_3213_cast_fp16, y = var_1203_cast_fp16)[name = string("op_3424_cast_fp16")]; tensor var_3425_cast_fp16 = mul(x = nk_13_cast_fp16, y = update_mask_cast_fp16)[name = string("op_3425_cast_fp16")]; tensor lkc_27_cast_fp16 = add(x = var_3424_cast_fp16, y = var_3425_cast_fp16)[name = string("lkc_27_cast_fp16")]; tensor var_3431_cast_fp16 = mul(x = var_3233_cast_fp16, y = var_1203_cast_fp16)[name = string("op_3431_cast_fp16")]; tensor var_3432_cast_fp16 = mul(x = nv_13_cast_fp16, y = update_mask_cast_fp16)[name = string("op_3432_cast_fp16")]; tensor lvc_27_cast_fp16 = add(x = var_3431_cast_fp16, y = var_3432_cast_fp16)[name = string("lvc_27_cast_fp16")]; tensor var_3436_axes_0 = const()[name = string("op_3436_axes_0"), val = tensor([2])]; tensor var_3436_cast_fp16 = squeeze(axes = var_3436_axes_0, x = lkc_27_cast_fp16)[name = string("op_3436_cast_fp16")]; tensor var_3441 = const()[name = string("op_3441"), val = tensor([1, 8, 128, 256])]; tensor kc_25_cast_fp16 = reshape(shape = var_3441, x = var_3436_cast_fp16)[name = string("kc_25_cast_fp16")]; tensor var_3444_axes_0 = const()[name = string("op_3444_axes_0"), val = tensor([2])]; tensor var_3444_cast_fp16 = squeeze(axes = var_3444_axes_0, x = lvc_27_cast_fp16)[name = string("op_3444_cast_fp16")]; tensor var_3449 = const()[name = string("op_3449"), val = tensor([1, 8, 128, 256])]; tensor vc_25_cast_fp16 = reshape(shape = var_3449, x = var_3444_cast_fp16)[name = string("vc_25_cast_fp16")]; tensor var_3452_axes_0 = const()[name = string("op_3452_axes_0"), val = tensor([2])]; tensor var_3452_cast_fp16 = expand_dims(axes = var_3452_axes_0, x = kc_25_cast_fp16)[name = string("op_3452_cast_fp16")]; tensor var_3460_reps_0 = const()[name = string("op_3460_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3460_cast_fp16 = tile(reps = var_3460_reps_0, x = var_3452_cast_fp16)[name = string("op_3460_cast_fp16")]; tensor var_3465 = const()[name = string("op_3465"), val = tensor([1, 16, 128, 256])]; tensor kc_27_cast_fp16 = reshape(shape = var_3465, x = var_3460_cast_fp16)[name = string("kc_27_cast_fp16")]; tensor var_3468_axes_0 = const()[name = string("op_3468_axes_0"), val = tensor([2])]; tensor var_3468_cast_fp16 = expand_dims(axes = var_3468_axes_0, x = vc_25_cast_fp16)[name = string("op_3468_cast_fp16")]; tensor var_3476_reps_0 = const()[name = string("op_3476_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3476_cast_fp16 = tile(reps = var_3476_reps_0, x = var_3468_cast_fp16)[name = string("op_3476_cast_fp16")]; tensor var_3481 = const()[name = string("op_3481"), val = tensor([1, 16, 128, 256])]; tensor vc_27_cast_fp16 = reshape(shape = var_3481, x = var_3476_cast_fp16)[name = string("vc_27_cast_fp16")]; tensor var_3485_perm_0 = const()[name = string("op_3485_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_3486_transpose_x_0 = const()[name = string("op_3486_transpose_x_0"), val = bool(false)]; bool var_3486_transpose_y_0 = const()[name = string("op_3486_transpose_y_0"), val = bool(false)]; tensor var_3485_cast_fp16 = transpose(perm = var_3485_perm_0, x = q_41_cast_fp16)[name = string("transpose_43")]; tensor var_3486_cast_fp16 = matmul(transpose_x = var_3486_transpose_x_0, transpose_y = var_3486_transpose_y_0, x = var_3485_cast_fp16, y = kc_27_cast_fp16)[name = string("op_3486_cast_fp16")]; fp16 _inversed_aw_49_y_0_to_fp16 = const()[name = string("_inversed_aw_49_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_49_cast_fp16 = mul(x = var_3486_cast_fp16, y = _inversed_aw_49_y_0_to_fp16)[name = string("_inversed_aw_49_cast_fp16")]; tensor aw_51_cast_fp16 = add(x = _inversed_aw_49_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_51_cast_fp16")]; int32 var_3500 = const()[name = string("op_3500"), val = int32(-1)]; tensor aw_55_cast_fp16 = softmax(axis = var_3500, x = aw_51_cast_fp16)[name = string("aw_55_cast_fp16")]; bool var_3506_transpose_x_1 = const()[name = string("op_3506_transpose_x_1"), val = bool(false)]; bool var_3506_transpose_y_1 = const()[name = string("op_3506_transpose_y_1"), val = bool(true)]; tensor var_3506_cast_fp16 = matmul(transpose_x = var_3506_transpose_x_1, transpose_y = var_3506_transpose_y_1, x = aw_55_cast_fp16, y = vc_27_cast_fp16)[name = string("op_3506_cast_fp16")]; tensor var_3509_perm_0 = const()[name = string("op_3509_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3513 = const()[name = string("op_3513"), val = tensor([1, 1, -1])]; tensor var_3509_cast_fp16 = transpose(perm = var_3509_perm_0, x = var_3506_cast_fp16)[name = string("transpose_42")]; tensor input_63_cast_fp16 = reshape(shape = var_3513, x = var_3509_cast_fp16)[name = string("input_63_cast_fp16")]; tensor layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98639616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100736832))))[name = string("layers_6_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor var_3519_axes_0 = const()[name = string("op_3519_axes_0"), val = tensor([0])]; tensor var_3519_cast_fp16 = squeeze(axes = var_3519_axes_0, x = linear_45_cast_fp16)[name = string("op_3519_cast_fp16")]; tensor var_3521_axes_0 = const()[name = string("op_3521_axes_0"), val = tensor([0])]; tensor var_3521_cast_fp16 = squeeze(axes = var_3521_axes_0, x = var_3519_cast_fp16)[name = string("op_3521_cast_fp16")]; tensor var_3523_axes_0 = const()[name = string("op_3523_axes_0"), val = tensor([-1])]; tensor var_3523_cast_fp16 = expand_dims(axes = var_3523_axes_0, x = var_3521_cast_fp16)[name = string("op_3523_cast_fp16")]; tensor ao_13_axes_0 = const()[name = string("ao_13_axes_0"), val = tensor([-1])]; tensor ao_13_cast_fp16 = expand_dims(axes = ao_13_axes_0, x = var_3523_cast_fp16)[name = string("ao_13_cast_fp16")]; tensor hidden_25_cast_fp16 = add(x = hidden_23_cast_fp16, y = ao_13_cast_fp16)[name = string("hidden_25_cast_fp16")]; tensor var_3529_axes_0 = const()[name = string("op_3529_axes_0"), val = tensor([-1])]; tensor var_3529_cast_fp16 = squeeze(axes = var_3529_axes_0, x = hidden_25_cast_fp16)[name = string("op_3529_cast_fp16")]; tensor var_3531_axes_0 = const()[name = string("op_3531_axes_0"), val = tensor([-1])]; tensor var_3531_cast_fp16 = squeeze(axes = var_3531_axes_0, x = var_3529_cast_fp16)[name = string("op_3531_cast_fp16")]; tensor hidden_states_109_axes_0 = const()[name = string("hidden_states_109_axes_0"), val = tensor([0])]; tensor hidden_states_109_cast_fp16 = expand_dims(axes = hidden_states_109_axes_0, x = var_3531_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; fp16 var_3537_promoted_to_fp16 = const()[name = string("op_3537_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3543_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_3537_promoted_to_fp16)[name = string("op_3543_cast_fp16")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_3543_cast_fp16)[name = string("variance_55_cast_fp16")]; tensor const_70_to_fp16 = const()[name = string("const_70_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100737408)))]; tensor var_3547_cast_fp16 = mul(x = const_70_to_fp16, y = hidden_states_109_cast_fp16)[name = string("op_3547_cast_fp16")]; fp16 var_3548_to_fp16 = const()[name = string("op_3548_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3549_cast_fp16 = add(x = variance_55_cast_fp16, y = var_3548_to_fp16)[name = string("op_3549_cast_fp16")]; fp32 var_3550_epsilon_0 = const()[name = string("op_3550_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3550_cast_fp16 = rsqrt(epsilon = var_3550_epsilon_0, x = var_3549_cast_fp16)[name = string("op_3550_cast_fp16")]; tensor input_65_cast_fp16 = mul(x = var_3547_cast_fp16, y = var_3550_cast_fp16)[name = string("input_65_cast_fp16")]; tensor layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100739520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885312))))[name = string("layers_6_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_3558_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_3558_cast_fp16")]; tensor layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103885888))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107031680))))[name = string("layers_6_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_6_mlp_up_proj_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_69_cast_fp16 = mul(x = var_3558_cast_fp16, y = linear_47_cast_fp16)[name = string("input_69_cast_fp16")]; tensor layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107032256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178048))))[name = string("layers_6_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor var_3565_axes_0 = const()[name = string("op_3565_axes_0"), val = tensor([0])]; tensor var_3565_cast_fp16 = squeeze(axes = var_3565_axes_0, x = linear_48_cast_fp16)[name = string("op_3565_cast_fp16")]; tensor var_3567_axes_0 = const()[name = string("op_3567_axes_0"), val = tensor([0])]; tensor var_3567_cast_fp16 = squeeze(axes = var_3567_axes_0, x = var_3565_cast_fp16)[name = string("op_3567_cast_fp16")]; tensor var_3569_axes_0 = const()[name = string("op_3569_axes_0"), val = tensor([-1])]; tensor var_3569_cast_fp16 = expand_dims(axes = var_3569_axes_0, x = var_3567_cast_fp16)[name = string("op_3569_cast_fp16")]; tensor h_13_axes_0 = const()[name = string("h_13_axes_0"), val = tensor([-1])]; tensor h_13_cast_fp16 = expand_dims(axes = h_13_axes_0, x = var_3569_cast_fp16)[name = string("h_13_cast_fp16")]; tensor hidden_27_cast_fp16 = add(x = hidden_25_cast_fp16, y = h_13_cast_fp16)[name = string("hidden_27_cast_fp16")]; tensor var_3583_begin_0 = const()[name = string("op_3583_begin_0"), val = tensor([0, 7168, 0, 0])]; tensor var_3583_end_0 = const()[name = string("op_3583_end_0"), val = tensor([1, 8192, 1, 256])]; tensor var_3583_end_mask_0 = const()[name = string("op_3583_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3583_cast_fp16 = slice_by_index(begin = var_3583_begin_0, end = var_3583_end_0, end_mask = var_3583_end_mask_0, x = key_cache)[name = string("op_3583_cast_fp16")]; tensor var_3603_begin_0 = const()[name = string("op_3603_begin_0"), val = tensor([0, 7168, 0, 0])]; tensor var_3603_end_0 = const()[name = string("op_3603_end_0"), val = tensor([1, 8192, 1, 256])]; tensor var_3603_end_mask_0 = const()[name = string("op_3603_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3603_cast_fp16 = slice_by_index(begin = var_3603_begin_0, end = var_3603_end_0, end_mask = var_3603_end_mask_0, x = value_cache)[name = string("op_3603_cast_fp16")]; tensor var_3615_axes_0 = const()[name = string("op_3615_axes_0"), val = tensor([-1])]; tensor var_3615_cast_fp16 = squeeze(axes = var_3615_axes_0, x = hidden_27_cast_fp16)[name = string("op_3615_cast_fp16")]; tensor var_3617_axes_0 = const()[name = string("op_3617_axes_0"), val = tensor([-1])]; tensor var_3617_cast_fp16 = squeeze(axes = var_3617_axes_0, x = var_3615_cast_fp16)[name = string("op_3617_cast_fp16")]; tensor hidden_states_113_axes_0 = const()[name = string("hidden_states_113_axes_0"), val = tensor([0])]; tensor hidden_states_113_cast_fp16 = expand_dims(axes = hidden_states_113_axes_0, x = var_3617_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; fp16 var_3623_promoted_to_fp16 = const()[name = string("op_3623_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3629_cast_fp16 = pow(x = hidden_states_113_cast_fp16, y = var_3623_promoted_to_fp16)[name = string("op_3629_cast_fp16")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_3629_cast_fp16)[name = string("variance_57_cast_fp16")]; tensor const_71_to_fp16 = const()[name = string("const_71_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110178624)))]; tensor var_3633_cast_fp16 = mul(x = const_71_to_fp16, y = hidden_states_113_cast_fp16)[name = string("op_3633_cast_fp16")]; fp16 var_3634_to_fp16 = const()[name = string("op_3634_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3635_cast_fp16 = add(x = variance_57_cast_fp16, y = var_3634_to_fp16)[name = string("op_3635_cast_fp16")]; fp32 var_3636_epsilon_0 = const()[name = string("op_3636_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3636_cast_fp16 = rsqrt(epsilon = var_3636_epsilon_0, x = var_3635_cast_fp16)[name = string("op_3636_cast_fp16")]; tensor input_71_cast_fp16 = mul(x = var_3633_cast_fp16, y = var_3636_cast_fp16)[name = string("input_71_cast_fp16")]; tensor layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110180736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112277952))))[name = string("layers_7_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor var_3645 = const()[name = string("op_3645"), val = tensor([1, 1, 16, 128])]; tensor var_3646_cast_fp16 = reshape(shape = var_3645, x = linear_49_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112278528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327168))))[name = string("layers_7_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor var_3657 = const()[name = string("op_3657"), val = tensor([1, 1, 8, 128])]; tensor var_3658_cast_fp16 = reshape(shape = var_3657, x = linear_50_cast_fp16)[name = string("op_3658_cast_fp16")]; tensor layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113327744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376384))))[name = string("layers_7_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("linear_51_cast_fp16")]; fp16 var_3677_promoted_to_fp16 = const()[name = string("op_3677_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3683_cast_fp16 = pow(x = var_3646_cast_fp16, y = var_3677_promoted_to_fp16)[name = string("op_3683_cast_fp16")]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; tensor const_310 = const()[name = string("const_310"), val = tensor([3])]; tensor variance_59_cast_fp16 = reduce_mean(axes = const_310, keep_dims = variance_59_keep_dims_0, x = var_3683_cast_fp16)[name = string("variance_59_cast_fp16")]; tensor const_311_to_fp16 = const()[name = string("const_311_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114376960)))]; tensor var_3687_cast_fp16 = mul(x = const_311_to_fp16, y = var_3646_cast_fp16)[name = string("op_3687_cast_fp16")]; fp16 var_3688_to_fp16 = const()[name = string("op_3688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3689_cast_fp16 = add(x = variance_59_cast_fp16, y = var_3688_to_fp16)[name = string("op_3689_cast_fp16")]; fp32 var_3690_epsilon_0 = const()[name = string("op_3690_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3690_cast_fp16 = rsqrt(epsilon = var_3690_epsilon_0, x = var_3689_cast_fp16)[name = string("op_3690_cast_fp16")]; tensor q_43_cast_fp16 = mul(x = var_3687_cast_fp16, y = var_3690_cast_fp16)[name = string("q_43_cast_fp16")]; fp16 var_3695_promoted_to_fp16 = const()[name = string("op_3695_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3701_cast_fp16 = pow(x = var_3658_cast_fp16, y = var_3695_promoted_to_fp16)[name = string("op_3701_cast_fp16")]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; tensor const_312 = const()[name = string("const_312"), val = tensor([3])]; tensor variance_61_cast_fp16 = reduce_mean(axes = const_312, keep_dims = variance_61_keep_dims_0, x = var_3701_cast_fp16)[name = string("variance_61_cast_fp16")]; tensor const_313_to_fp16 = const()[name = string("const_313_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377280)))]; tensor var_3705_cast_fp16 = mul(x = const_313_to_fp16, y = var_3658_cast_fp16)[name = string("op_3705_cast_fp16")]; fp16 var_3706_to_fp16 = const()[name = string("op_3706_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3707_cast_fp16 = add(x = variance_61_cast_fp16, y = var_3706_to_fp16)[name = string("op_3707_cast_fp16")]; fp32 var_3708_epsilon_0 = const()[name = string("op_3708_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3708_cast_fp16 = rsqrt(epsilon = var_3708_epsilon_0, x = var_3707_cast_fp16)[name = string("op_3708_cast_fp16")]; tensor k_43_cast_fp16 = mul(x = var_3705_cast_fp16, y = var_3708_cast_fp16)[name = string("k_43_cast_fp16")]; tensor var_3723_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3723_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_43_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_43_cast_fp16)[name = string("x2_29_cast_fp16")]; fp16 const_76_promoted_to_fp16 = const()[name = string("const_76_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3744_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_76_promoted_to_fp16)[name = string("op_3744_cast_fp16")]; int32 var_3746 = const()[name = string("op_3746"), val = int32(-1)]; bool var_3747_interleave_0 = const()[name = string("op_3747_interleave_0"), val = bool(false)]; tensor var_3747_cast_fp16 = concat(axis = var_3746, interleave = var_3747_interleave_0, values = (var_3744_cast_fp16, x1_29_cast_fp16))[name = string("op_3747_cast_fp16")]; tensor var_3748_cast_fp16 = mul(x = var_3747_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3748_cast_fp16")]; tensor q_47_cast_fp16 = add(x = var_3723_cast_fp16, y = var_3748_cast_fp16)[name = string("q_47_cast_fp16")]; tensor var_3751_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_1_cast_fp16)[name = string("op_3751_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_43_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_43_cast_fp16)[name = string("x2_31_cast_fp16")]; fp16 const_79_promoted_to_fp16 = const()[name = string("const_79_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3772_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_79_promoted_to_fp16)[name = string("op_3772_cast_fp16")]; int32 var_3774 = const()[name = string("op_3774"), val = int32(-1)]; bool var_3775_interleave_0 = const()[name = string("op_3775_interleave_0"), val = bool(false)]; tensor var_3775_cast_fp16 = concat(axis = var_3774, interleave = var_3775_interleave_0, values = (var_3772_cast_fp16, x1_31_cast_fp16))[name = string("op_3775_cast_fp16")]; tensor var_3776_cast_fp16 = mul(x = var_3775_cast_fp16, y = sin_1_cast_fp16)[name = string("op_3776_cast_fp16")]; tensor k_47_cast_fp16 = add(x = var_3751_cast_fp16, y = var_3776_cast_fp16)[name = string("k_47_cast_fp16")]; tensor var_3783 = const()[name = string("op_3783"), val = tensor([1, 1024, 1, 1])]; tensor nk_15_cast_fp16 = reshape(shape = var_3783, x = k_47_cast_fp16)[name = string("nk_15_cast_fp16")]; tensor var_3789 = const()[name = string("op_3789"), val = tensor([1, 1024, 1, 1])]; tensor nv_15_cast_fp16 = reshape(shape = var_3789, x = linear_51_cast_fp16)[name = string("nv_15_cast_fp16")]; tensor var_3794_cast_fp16 = mul(x = var_3583_cast_fp16, y = var_1203_cast_fp16)[name = string("op_3794_cast_fp16")]; tensor var_3795_cast_fp16 = mul(x = nk_15_cast_fp16, y = update_mask_cast_fp16)[name = string("op_3795_cast_fp16")]; tensor lkc_31_cast_fp16 = add(x = var_3794_cast_fp16, y = var_3795_cast_fp16)[name = string("lkc_31_cast_fp16")]; tensor var_3801_cast_fp16 = mul(x = var_3603_cast_fp16, y = var_1203_cast_fp16)[name = string("op_3801_cast_fp16")]; tensor var_3802_cast_fp16 = mul(x = nv_15_cast_fp16, y = update_mask_cast_fp16)[name = string("op_3802_cast_fp16")]; tensor lvc_31_cast_fp16 = add(x = var_3801_cast_fp16, y = var_3802_cast_fp16)[name = string("lvc_31_cast_fp16")]; tensor var_3806_axes_0 = const()[name = string("op_3806_axes_0"), val = tensor([2])]; tensor var_3806_cast_fp16 = squeeze(axes = var_3806_axes_0, x = lkc_31_cast_fp16)[name = string("op_3806_cast_fp16")]; tensor var_3811 = const()[name = string("op_3811"), val = tensor([1, 8, 128, 256])]; tensor kc_29_cast_fp16 = reshape(shape = var_3811, x = var_3806_cast_fp16)[name = string("kc_29_cast_fp16")]; tensor var_3814_axes_0 = const()[name = string("op_3814_axes_0"), val = tensor([2])]; tensor var_3814_cast_fp16 = squeeze(axes = var_3814_axes_0, x = lvc_31_cast_fp16)[name = string("op_3814_cast_fp16")]; tensor var_3819 = const()[name = string("op_3819"), val = tensor([1, 8, 128, 256])]; tensor vc_29_cast_fp16 = reshape(shape = var_3819, x = var_3814_cast_fp16)[name = string("vc_29_cast_fp16")]; tensor var_3822_axes_0 = const()[name = string("op_3822_axes_0"), val = tensor([2])]; tensor var_3822_cast_fp16 = expand_dims(axes = var_3822_axes_0, x = kc_29_cast_fp16)[name = string("op_3822_cast_fp16")]; tensor var_3830_reps_0 = const()[name = string("op_3830_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3830_cast_fp16 = tile(reps = var_3830_reps_0, x = var_3822_cast_fp16)[name = string("op_3830_cast_fp16")]; tensor var_3835 = const()[name = string("op_3835"), val = tensor([1, 16, 128, 256])]; tensor kc_31_cast_fp16 = reshape(shape = var_3835, x = var_3830_cast_fp16)[name = string("kc_31_cast_fp16")]; tensor var_3838_axes_0 = const()[name = string("op_3838_axes_0"), val = tensor([2])]; tensor var_3838_cast_fp16 = expand_dims(axes = var_3838_axes_0, x = vc_29_cast_fp16)[name = string("op_3838_cast_fp16")]; tensor var_3846_reps_0 = const()[name = string("op_3846_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_3846_cast_fp16 = tile(reps = var_3846_reps_0, x = var_3838_cast_fp16)[name = string("op_3846_cast_fp16")]; tensor var_3851 = const()[name = string("op_3851"), val = tensor([1, 16, 128, 256])]; tensor vc_31_cast_fp16 = reshape(shape = var_3851, x = var_3846_cast_fp16)[name = string("vc_31_cast_fp16")]; tensor var_3855_perm_0 = const()[name = string("op_3855_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_3856_transpose_x_0 = const()[name = string("op_3856_transpose_x_0"), val = bool(false)]; bool var_3856_transpose_y_0 = const()[name = string("op_3856_transpose_y_0"), val = bool(false)]; tensor var_3855_cast_fp16 = transpose(perm = var_3855_perm_0, x = q_47_cast_fp16)[name = string("transpose_41")]; tensor var_3856_cast_fp16 = matmul(transpose_x = var_3856_transpose_x_0, transpose_y = var_3856_transpose_y_0, x = var_3855_cast_fp16, y = kc_31_cast_fp16)[name = string("op_3856_cast_fp16")]; fp16 _inversed_aw_57_y_0_to_fp16 = const()[name = string("_inversed_aw_57_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_57_cast_fp16 = mul(x = var_3856_cast_fp16, y = _inversed_aw_57_y_0_to_fp16)[name = string("_inversed_aw_57_cast_fp16")]; tensor aw_59_cast_fp16 = add(x = _inversed_aw_57_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_59_cast_fp16")]; int32 var_3870 = const()[name = string("op_3870"), val = int32(-1)]; tensor aw_63_cast_fp16 = softmax(axis = var_3870, x = aw_59_cast_fp16)[name = string("aw_63_cast_fp16")]; bool var_3876_transpose_x_1 = const()[name = string("op_3876_transpose_x_1"), val = bool(false)]; bool var_3876_transpose_y_1 = const()[name = string("op_3876_transpose_y_1"), val = bool(true)]; tensor var_3876_cast_fp16 = matmul(transpose_x = var_3876_transpose_x_1, transpose_y = var_3876_transpose_y_1, x = aw_63_cast_fp16, y = vc_31_cast_fp16)[name = string("op_3876_cast_fp16")]; tensor var_3879_perm_0 = const()[name = string("op_3879_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3883 = const()[name = string("op_3883"), val = tensor([1, 1, -1])]; tensor var_3879_cast_fp16 = transpose(perm = var_3879_perm_0, x = var_3876_cast_fp16)[name = string("transpose_40")]; tensor input_73_cast_fp16 = reshape(shape = var_3883, x = var_3879_cast_fp16)[name = string("input_73_cast_fp16")]; tensor layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114377600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116474816))))[name = string("layers_7_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor var_3889_axes_0 = const()[name = string("op_3889_axes_0"), val = tensor([0])]; tensor var_3889_cast_fp16 = squeeze(axes = var_3889_axes_0, x = linear_52_cast_fp16)[name = string("op_3889_cast_fp16")]; tensor var_3891_axes_0 = const()[name = string("op_3891_axes_0"), val = tensor([0])]; tensor var_3891_cast_fp16 = squeeze(axes = var_3891_axes_0, x = var_3889_cast_fp16)[name = string("op_3891_cast_fp16")]; tensor var_3893_axes_0 = const()[name = string("op_3893_axes_0"), val = tensor([-1])]; tensor var_3893_cast_fp16 = expand_dims(axes = var_3893_axes_0, x = var_3891_cast_fp16)[name = string("op_3893_cast_fp16")]; tensor ao_15_axes_0 = const()[name = string("ao_15_axes_0"), val = tensor([-1])]; tensor ao_15_cast_fp16 = expand_dims(axes = ao_15_axes_0, x = var_3893_cast_fp16)[name = string("ao_15_cast_fp16")]; tensor hidden_29_cast_fp16 = add(x = hidden_27_cast_fp16, y = ao_15_cast_fp16)[name = string("hidden_29_cast_fp16")]; tensor var_3899_axes_0 = const()[name = string("op_3899_axes_0"), val = tensor([-1])]; tensor var_3899_cast_fp16 = squeeze(axes = var_3899_axes_0, x = hidden_29_cast_fp16)[name = string("op_3899_cast_fp16")]; tensor var_3901_axes_0 = const()[name = string("op_3901_axes_0"), val = tensor([-1])]; tensor var_3901_cast_fp16 = squeeze(axes = var_3901_axes_0, x = var_3899_cast_fp16)[name = string("op_3901_cast_fp16")]; tensor hidden_states_125_axes_0 = const()[name = string("hidden_states_125_axes_0"), val = tensor([0])]; tensor hidden_states_125_cast_fp16 = expand_dims(axes = hidden_states_125_axes_0, x = var_3901_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; fp16 var_3907_promoted_to_fp16 = const()[name = string("op_3907_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3913_cast_fp16 = pow(x = hidden_states_125_cast_fp16, y = var_3907_promoted_to_fp16)[name = string("op_3913_cast_fp16")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_3913_cast_fp16)[name = string("variance_63_cast_fp16")]; tensor const_80_to_fp16 = const()[name = string("const_80_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116475392)))]; tensor var_3917_cast_fp16 = mul(x = const_80_to_fp16, y = hidden_states_125_cast_fp16)[name = string("op_3917_cast_fp16")]; fp16 var_3918_to_fp16 = const()[name = string("op_3918_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3919_cast_fp16 = add(x = variance_63_cast_fp16, y = var_3918_to_fp16)[name = string("op_3919_cast_fp16")]; fp32 var_3920_epsilon_0 = const()[name = string("op_3920_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3920_cast_fp16 = rsqrt(epsilon = var_3920_epsilon_0, x = var_3919_cast_fp16)[name = string("op_3920_cast_fp16")]; tensor input_75_cast_fp16 = mul(x = var_3917_cast_fp16, y = var_3920_cast_fp16)[name = string("input_75_cast_fp16")]; tensor layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116477504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623296))))[name = string("layers_7_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_3928_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_3928_cast_fp16")]; tensor layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119623872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122769664))))[name = string("layers_7_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_7_mlp_up_proj_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_79_cast_fp16 = mul(x = var_3928_cast_fp16, y = linear_54_cast_fp16)[name = string("input_79_cast_fp16")]; tensor layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122770240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916032))))[name = string("layers_7_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor var_3935_axes_0 = const()[name = string("op_3935_axes_0"), val = tensor([0])]; tensor var_3935_cast_fp16 = squeeze(axes = var_3935_axes_0, x = linear_55_cast_fp16)[name = string("op_3935_cast_fp16")]; tensor var_3937_axes_0 = const()[name = string("op_3937_axes_0"), val = tensor([0])]; tensor var_3937_cast_fp16 = squeeze(axes = var_3937_axes_0, x = var_3935_cast_fp16)[name = string("op_3937_cast_fp16")]; tensor var_3939_axes_0 = const()[name = string("op_3939_axes_0"), val = tensor([-1])]; tensor var_3939_cast_fp16 = expand_dims(axes = var_3939_axes_0, x = var_3937_cast_fp16)[name = string("op_3939_cast_fp16")]; tensor h_15_axes_0 = const()[name = string("h_15_axes_0"), val = tensor([-1])]; tensor h_15_cast_fp16 = expand_dims(axes = h_15_axes_0, x = var_3939_cast_fp16)[name = string("h_15_cast_fp16")]; tensor hidden_31_cast_fp16 = add(x = hidden_29_cast_fp16, y = h_15_cast_fp16)[name = string("hidden_31_cast_fp16")]; tensor var_3953_begin_0 = const()[name = string("op_3953_begin_0"), val = tensor([0, 8192, 0, 0])]; tensor var_3953_end_0 = const()[name = string("op_3953_end_0"), val = tensor([1, 9216, 1, 256])]; tensor var_3953_end_mask_0 = const()[name = string("op_3953_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3953_cast_fp16 = slice_by_index(begin = var_3953_begin_0, end = var_3953_end_0, end_mask = var_3953_end_mask_0, x = key_cache)[name = string("op_3953_cast_fp16")]; tensor var_3973_begin_0 = const()[name = string("op_3973_begin_0"), val = tensor([0, 8192, 0, 0])]; tensor var_3973_end_0 = const()[name = string("op_3973_end_0"), val = tensor([1, 9216, 1, 256])]; tensor var_3973_end_mask_0 = const()[name = string("op_3973_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3973_cast_fp16 = slice_by_index(begin = var_3973_begin_0, end = var_3973_end_0, end_mask = var_3973_end_mask_0, x = value_cache)[name = string("op_3973_cast_fp16")]; tensor var_3985_axes_0 = const()[name = string("op_3985_axes_0"), val = tensor([-1])]; tensor var_3985_cast_fp16 = squeeze(axes = var_3985_axes_0, x = hidden_31_cast_fp16)[name = string("op_3985_cast_fp16")]; tensor var_3987_axes_0 = const()[name = string("op_3987_axes_0"), val = tensor([-1])]; tensor var_3987_cast_fp16 = squeeze(axes = var_3987_axes_0, x = var_3985_cast_fp16)[name = string("op_3987_cast_fp16")]; tensor hidden_states_129_axes_0 = const()[name = string("hidden_states_129_axes_0"), val = tensor([0])]; tensor hidden_states_129_cast_fp16 = expand_dims(axes = hidden_states_129_axes_0, x = var_3987_cast_fp16)[name = string("hidden_states_129_cast_fp16")]; fp16 var_3993_promoted_to_fp16 = const()[name = string("op_3993_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_3999_cast_fp16 = pow(x = hidden_states_129_cast_fp16, y = var_3993_promoted_to_fp16)[name = string("op_3999_cast_fp16")]; tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_3999_cast_fp16)[name = string("variance_65_cast_fp16")]; tensor const_81_to_fp16 = const()[name = string("const_81_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125916608)))]; tensor var_4003_cast_fp16 = mul(x = const_81_to_fp16, y = hidden_states_129_cast_fp16)[name = string("op_4003_cast_fp16")]; fp16 var_4004_to_fp16 = const()[name = string("op_4004_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4005_cast_fp16 = add(x = variance_65_cast_fp16, y = var_4004_to_fp16)[name = string("op_4005_cast_fp16")]; fp32 var_4006_epsilon_0 = const()[name = string("op_4006_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4006_cast_fp16 = rsqrt(epsilon = var_4006_epsilon_0, x = var_4005_cast_fp16)[name = string("op_4006_cast_fp16")]; tensor input_81_cast_fp16 = mul(x = var_4003_cast_fp16, y = var_4006_cast_fp16)[name = string("input_81_cast_fp16")]; tensor layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125918720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128015936))))[name = string("layers_8_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor var_4015 = const()[name = string("op_4015"), val = tensor([1, 1, 16, 128])]; tensor var_4016_cast_fp16 = reshape(shape = var_4015, x = linear_56_cast_fp16)[name = string("op_4016_cast_fp16")]; tensor layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128016512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065152))))[name = string("layers_8_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor var_4027 = const()[name = string("op_4027"), val = tensor([1, 1, 8, 128])]; tensor var_4028_cast_fp16 = reshape(shape = var_4027, x = linear_57_cast_fp16)[name = string("op_4028_cast_fp16")]; tensor layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129065728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114368))))[name = string("layers_8_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("linear_58_cast_fp16")]; fp16 var_4047_promoted_to_fp16 = const()[name = string("op_4047_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4053_cast_fp16 = pow(x = var_4016_cast_fp16, y = var_4047_promoted_to_fp16)[name = string("op_4053_cast_fp16")]; bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; tensor const_314 = const()[name = string("const_314"), val = tensor([3])]; tensor variance_67_cast_fp16 = reduce_mean(axes = const_314, keep_dims = variance_67_keep_dims_0, x = var_4053_cast_fp16)[name = string("variance_67_cast_fp16")]; tensor const_315_to_fp16 = const()[name = string("const_315_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130114944)))]; tensor var_4057_cast_fp16 = mul(x = const_315_to_fp16, y = var_4016_cast_fp16)[name = string("op_4057_cast_fp16")]; fp16 var_4058_to_fp16 = const()[name = string("op_4058_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4059_cast_fp16 = add(x = variance_67_cast_fp16, y = var_4058_to_fp16)[name = string("op_4059_cast_fp16")]; fp32 var_4060_epsilon_0 = const()[name = string("op_4060_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4060_cast_fp16 = rsqrt(epsilon = var_4060_epsilon_0, x = var_4059_cast_fp16)[name = string("op_4060_cast_fp16")]; tensor q_49_cast_fp16 = mul(x = var_4057_cast_fp16, y = var_4060_cast_fp16)[name = string("q_49_cast_fp16")]; fp16 var_4065_promoted_to_fp16 = const()[name = string("op_4065_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4071_cast_fp16 = pow(x = var_4028_cast_fp16, y = var_4065_promoted_to_fp16)[name = string("op_4071_cast_fp16")]; bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; tensor const_316 = const()[name = string("const_316"), val = tensor([3])]; tensor variance_69_cast_fp16 = reduce_mean(axes = const_316, keep_dims = variance_69_keep_dims_0, x = var_4071_cast_fp16)[name = string("variance_69_cast_fp16")]; tensor const_317_to_fp16 = const()[name = string("const_317_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115264)))]; tensor var_4075_cast_fp16 = mul(x = const_317_to_fp16, y = var_4028_cast_fp16)[name = string("op_4075_cast_fp16")]; fp16 var_4076_to_fp16 = const()[name = string("op_4076_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4077_cast_fp16 = add(x = variance_69_cast_fp16, y = var_4076_to_fp16)[name = string("op_4077_cast_fp16")]; fp32 var_4078_epsilon_0 = const()[name = string("op_4078_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4078_cast_fp16 = rsqrt(epsilon = var_4078_epsilon_0, x = var_4077_cast_fp16)[name = string("op_4078_cast_fp16")]; tensor k_49_cast_fp16 = mul(x = var_4075_cast_fp16, y = var_4078_cast_fp16)[name = string("k_49_cast_fp16")]; tensor var_4093_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4093_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_49_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_49_cast_fp16)[name = string("x2_33_cast_fp16")]; fp16 const_86_promoted_to_fp16 = const()[name = string("const_86_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4114_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_86_promoted_to_fp16)[name = string("op_4114_cast_fp16")]; int32 var_4116 = const()[name = string("op_4116"), val = int32(-1)]; bool var_4117_interleave_0 = const()[name = string("op_4117_interleave_0"), val = bool(false)]; tensor var_4117_cast_fp16 = concat(axis = var_4116, interleave = var_4117_interleave_0, values = (var_4114_cast_fp16, x1_33_cast_fp16))[name = string("op_4117_cast_fp16")]; tensor var_4118_cast_fp16 = mul(x = var_4117_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4118_cast_fp16")]; tensor q_53_cast_fp16 = add(x = var_4093_cast_fp16, y = var_4118_cast_fp16)[name = string("q_53_cast_fp16")]; tensor var_4121_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4121_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_49_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_49_cast_fp16)[name = string("x2_35_cast_fp16")]; fp16 const_89_promoted_to_fp16 = const()[name = string("const_89_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4142_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_89_promoted_to_fp16)[name = string("op_4142_cast_fp16")]; int32 var_4144 = const()[name = string("op_4144"), val = int32(-1)]; bool var_4145_interleave_0 = const()[name = string("op_4145_interleave_0"), val = bool(false)]; tensor var_4145_cast_fp16 = concat(axis = var_4144, interleave = var_4145_interleave_0, values = (var_4142_cast_fp16, x1_35_cast_fp16))[name = string("op_4145_cast_fp16")]; tensor var_4146_cast_fp16 = mul(x = var_4145_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4146_cast_fp16")]; tensor k_53_cast_fp16 = add(x = var_4121_cast_fp16, y = var_4146_cast_fp16)[name = string("k_53_cast_fp16")]; tensor var_4153 = const()[name = string("op_4153"), val = tensor([1, 1024, 1, 1])]; tensor nk_17_cast_fp16 = reshape(shape = var_4153, x = k_53_cast_fp16)[name = string("nk_17_cast_fp16")]; tensor var_4159 = const()[name = string("op_4159"), val = tensor([1, 1024, 1, 1])]; tensor nv_17_cast_fp16 = reshape(shape = var_4159, x = linear_58_cast_fp16)[name = string("nv_17_cast_fp16")]; tensor var_4164_cast_fp16 = mul(x = var_3953_cast_fp16, y = var_1203_cast_fp16)[name = string("op_4164_cast_fp16")]; tensor var_4165_cast_fp16 = mul(x = nk_17_cast_fp16, y = update_mask_cast_fp16)[name = string("op_4165_cast_fp16")]; tensor lkc_35_cast_fp16 = add(x = var_4164_cast_fp16, y = var_4165_cast_fp16)[name = string("lkc_35_cast_fp16")]; tensor var_4171_cast_fp16 = mul(x = var_3973_cast_fp16, y = var_1203_cast_fp16)[name = string("op_4171_cast_fp16")]; tensor var_4172_cast_fp16 = mul(x = nv_17_cast_fp16, y = update_mask_cast_fp16)[name = string("op_4172_cast_fp16")]; tensor lvc_35_cast_fp16 = add(x = var_4171_cast_fp16, y = var_4172_cast_fp16)[name = string("lvc_35_cast_fp16")]; tensor var_4176_axes_0 = const()[name = string("op_4176_axes_0"), val = tensor([2])]; tensor var_4176_cast_fp16 = squeeze(axes = var_4176_axes_0, x = lkc_35_cast_fp16)[name = string("op_4176_cast_fp16")]; tensor var_4181 = const()[name = string("op_4181"), val = tensor([1, 8, 128, 256])]; tensor kc_33_cast_fp16 = reshape(shape = var_4181, x = var_4176_cast_fp16)[name = string("kc_33_cast_fp16")]; tensor var_4184_axes_0 = const()[name = string("op_4184_axes_0"), val = tensor([2])]; tensor var_4184_cast_fp16 = squeeze(axes = var_4184_axes_0, x = lvc_35_cast_fp16)[name = string("op_4184_cast_fp16")]; tensor var_4189 = const()[name = string("op_4189"), val = tensor([1, 8, 128, 256])]; tensor vc_33_cast_fp16 = reshape(shape = var_4189, x = var_4184_cast_fp16)[name = string("vc_33_cast_fp16")]; tensor var_4192_axes_0 = const()[name = string("op_4192_axes_0"), val = tensor([2])]; tensor var_4192_cast_fp16 = expand_dims(axes = var_4192_axes_0, x = kc_33_cast_fp16)[name = string("op_4192_cast_fp16")]; tensor var_4200_reps_0 = const()[name = string("op_4200_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4200_cast_fp16 = tile(reps = var_4200_reps_0, x = var_4192_cast_fp16)[name = string("op_4200_cast_fp16")]; tensor var_4205 = const()[name = string("op_4205"), val = tensor([1, 16, 128, 256])]; tensor kc_35_cast_fp16 = reshape(shape = var_4205, x = var_4200_cast_fp16)[name = string("kc_35_cast_fp16")]; tensor var_4208_axes_0 = const()[name = string("op_4208_axes_0"), val = tensor([2])]; tensor var_4208_cast_fp16 = expand_dims(axes = var_4208_axes_0, x = vc_33_cast_fp16)[name = string("op_4208_cast_fp16")]; tensor var_4216_reps_0 = const()[name = string("op_4216_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4216_cast_fp16 = tile(reps = var_4216_reps_0, x = var_4208_cast_fp16)[name = string("op_4216_cast_fp16")]; tensor var_4221 = const()[name = string("op_4221"), val = tensor([1, 16, 128, 256])]; tensor vc_35_cast_fp16 = reshape(shape = var_4221, x = var_4216_cast_fp16)[name = string("vc_35_cast_fp16")]; tensor var_4225_perm_0 = const()[name = string("op_4225_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_4226_transpose_x_0 = const()[name = string("op_4226_transpose_x_0"), val = bool(false)]; bool var_4226_transpose_y_0 = const()[name = string("op_4226_transpose_y_0"), val = bool(false)]; tensor var_4225_cast_fp16 = transpose(perm = var_4225_perm_0, x = q_53_cast_fp16)[name = string("transpose_39")]; tensor var_4226_cast_fp16 = matmul(transpose_x = var_4226_transpose_x_0, transpose_y = var_4226_transpose_y_0, x = var_4225_cast_fp16, y = kc_35_cast_fp16)[name = string("op_4226_cast_fp16")]; fp16 _inversed_aw_65_y_0_to_fp16 = const()[name = string("_inversed_aw_65_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_65_cast_fp16 = mul(x = var_4226_cast_fp16, y = _inversed_aw_65_y_0_to_fp16)[name = string("_inversed_aw_65_cast_fp16")]; tensor aw_67_cast_fp16 = add(x = _inversed_aw_65_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_67_cast_fp16")]; int32 var_4240 = const()[name = string("op_4240"), val = int32(-1)]; tensor aw_71_cast_fp16 = softmax(axis = var_4240, x = aw_67_cast_fp16)[name = string("aw_71_cast_fp16")]; bool var_4246_transpose_x_1 = const()[name = string("op_4246_transpose_x_1"), val = bool(false)]; bool var_4246_transpose_y_1 = const()[name = string("op_4246_transpose_y_1"), val = bool(true)]; tensor var_4246_cast_fp16 = matmul(transpose_x = var_4246_transpose_x_1, transpose_y = var_4246_transpose_y_1, x = aw_71_cast_fp16, y = vc_35_cast_fp16)[name = string("op_4246_cast_fp16")]; tensor var_4249_perm_0 = const()[name = string("op_4249_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4253 = const()[name = string("op_4253"), val = tensor([1, 1, -1])]; tensor var_4249_cast_fp16 = transpose(perm = var_4249_perm_0, x = var_4246_cast_fp16)[name = string("transpose_38")]; tensor input_83_cast_fp16 = reshape(shape = var_4253, x = var_4249_cast_fp16)[name = string("input_83_cast_fp16")]; tensor layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130115584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132212800))))[name = string("layers_8_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor var_4259_axes_0 = const()[name = string("op_4259_axes_0"), val = tensor([0])]; tensor var_4259_cast_fp16 = squeeze(axes = var_4259_axes_0, x = linear_59_cast_fp16)[name = string("op_4259_cast_fp16")]; tensor var_4261_axes_0 = const()[name = string("op_4261_axes_0"), val = tensor([0])]; tensor var_4261_cast_fp16 = squeeze(axes = var_4261_axes_0, x = var_4259_cast_fp16)[name = string("op_4261_cast_fp16")]; tensor var_4263_axes_0 = const()[name = string("op_4263_axes_0"), val = tensor([-1])]; tensor var_4263_cast_fp16 = expand_dims(axes = var_4263_axes_0, x = var_4261_cast_fp16)[name = string("op_4263_cast_fp16")]; tensor ao_17_axes_0 = const()[name = string("ao_17_axes_0"), val = tensor([-1])]; tensor ao_17_cast_fp16 = expand_dims(axes = ao_17_axes_0, x = var_4263_cast_fp16)[name = string("ao_17_cast_fp16")]; tensor hidden_33_cast_fp16 = add(x = hidden_31_cast_fp16, y = ao_17_cast_fp16)[name = string("hidden_33_cast_fp16")]; tensor var_4269_axes_0 = const()[name = string("op_4269_axes_0"), val = tensor([-1])]; tensor var_4269_cast_fp16 = squeeze(axes = var_4269_axes_0, x = hidden_33_cast_fp16)[name = string("op_4269_cast_fp16")]; tensor var_4271_axes_0 = const()[name = string("op_4271_axes_0"), val = tensor([-1])]; tensor var_4271_cast_fp16 = squeeze(axes = var_4271_axes_0, x = var_4269_cast_fp16)[name = string("op_4271_cast_fp16")]; tensor hidden_states_141_axes_0 = const()[name = string("hidden_states_141_axes_0"), val = tensor([0])]; tensor hidden_states_141_cast_fp16 = expand_dims(axes = hidden_states_141_axes_0, x = var_4271_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; fp16 var_4277_promoted_to_fp16 = const()[name = string("op_4277_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4283_cast_fp16 = pow(x = hidden_states_141_cast_fp16, y = var_4277_promoted_to_fp16)[name = string("op_4283_cast_fp16")]; tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_4283_cast_fp16)[name = string("variance_71_cast_fp16")]; tensor const_90_to_fp16 = const()[name = string("const_90_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132213376)))]; tensor var_4287_cast_fp16 = mul(x = const_90_to_fp16, y = hidden_states_141_cast_fp16)[name = string("op_4287_cast_fp16")]; fp16 var_4288_to_fp16 = const()[name = string("op_4288_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4289_cast_fp16 = add(x = variance_71_cast_fp16, y = var_4288_to_fp16)[name = string("op_4289_cast_fp16")]; fp32 var_4290_epsilon_0 = const()[name = string("op_4290_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4290_cast_fp16 = rsqrt(epsilon = var_4290_epsilon_0, x = var_4289_cast_fp16)[name = string("op_4290_cast_fp16")]; tensor input_85_cast_fp16 = mul(x = var_4287_cast_fp16, y = var_4290_cast_fp16)[name = string("input_85_cast_fp16")]; tensor layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132215488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361280))))[name = string("layers_8_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_4298_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_4298_cast_fp16")]; tensor layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135361856))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138507648))))[name = string("layers_8_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_8_mlp_up_proj_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_89_cast_fp16 = mul(x = var_4298_cast_fp16, y = linear_61_cast_fp16)[name = string("input_89_cast_fp16")]; tensor layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138508224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654016))))[name = string("layers_8_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor var_4305_axes_0 = const()[name = string("op_4305_axes_0"), val = tensor([0])]; tensor var_4305_cast_fp16 = squeeze(axes = var_4305_axes_0, x = linear_62_cast_fp16)[name = string("op_4305_cast_fp16")]; tensor var_4307_axes_0 = const()[name = string("op_4307_axes_0"), val = tensor([0])]; tensor var_4307_cast_fp16 = squeeze(axes = var_4307_axes_0, x = var_4305_cast_fp16)[name = string("op_4307_cast_fp16")]; tensor var_4309_axes_0 = const()[name = string("op_4309_axes_0"), val = tensor([-1])]; tensor var_4309_cast_fp16 = expand_dims(axes = var_4309_axes_0, x = var_4307_cast_fp16)[name = string("op_4309_cast_fp16")]; tensor h_17_axes_0 = const()[name = string("h_17_axes_0"), val = tensor([-1])]; tensor h_17_cast_fp16 = expand_dims(axes = h_17_axes_0, x = var_4309_cast_fp16)[name = string("h_17_cast_fp16")]; tensor hidden_35_cast_fp16 = add(x = hidden_33_cast_fp16, y = h_17_cast_fp16)[name = string("hidden_35_cast_fp16")]; tensor var_4323_begin_0 = const()[name = string("op_4323_begin_0"), val = tensor([0, 9216, 0, 0])]; tensor var_4323_end_0 = const()[name = string("op_4323_end_0"), val = tensor([1, 10240, 1, 256])]; tensor var_4323_end_mask_0 = const()[name = string("op_4323_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4323_cast_fp16 = slice_by_index(begin = var_4323_begin_0, end = var_4323_end_0, end_mask = var_4323_end_mask_0, x = key_cache)[name = string("op_4323_cast_fp16")]; tensor var_4343_begin_0 = const()[name = string("op_4343_begin_0"), val = tensor([0, 9216, 0, 0])]; tensor var_4343_end_0 = const()[name = string("op_4343_end_0"), val = tensor([1, 10240, 1, 256])]; tensor var_4343_end_mask_0 = const()[name = string("op_4343_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4343_cast_fp16 = slice_by_index(begin = var_4343_begin_0, end = var_4343_end_0, end_mask = var_4343_end_mask_0, x = value_cache)[name = string("op_4343_cast_fp16")]; tensor var_4355_axes_0 = const()[name = string("op_4355_axes_0"), val = tensor([-1])]; tensor var_4355_cast_fp16 = squeeze(axes = var_4355_axes_0, x = hidden_35_cast_fp16)[name = string("op_4355_cast_fp16")]; tensor var_4357_axes_0 = const()[name = string("op_4357_axes_0"), val = tensor([-1])]; tensor var_4357_cast_fp16 = squeeze(axes = var_4357_axes_0, x = var_4355_cast_fp16)[name = string("op_4357_cast_fp16")]; tensor hidden_states_145_axes_0 = const()[name = string("hidden_states_145_axes_0"), val = tensor([0])]; tensor hidden_states_145_cast_fp16 = expand_dims(axes = hidden_states_145_axes_0, x = var_4357_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; fp16 var_4363_promoted_to_fp16 = const()[name = string("op_4363_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4369_cast_fp16 = pow(x = hidden_states_145_cast_fp16, y = var_4363_promoted_to_fp16)[name = string("op_4369_cast_fp16")]; tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_4369_cast_fp16)[name = string("variance_73_cast_fp16")]; tensor const_91_to_fp16 = const()[name = string("const_91_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141654592)))]; tensor var_4373_cast_fp16 = mul(x = const_91_to_fp16, y = hidden_states_145_cast_fp16)[name = string("op_4373_cast_fp16")]; fp16 var_4374_to_fp16 = const()[name = string("op_4374_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4375_cast_fp16 = add(x = variance_73_cast_fp16, y = var_4374_to_fp16)[name = string("op_4375_cast_fp16")]; fp32 var_4376_epsilon_0 = const()[name = string("op_4376_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4376_cast_fp16 = rsqrt(epsilon = var_4376_epsilon_0, x = var_4375_cast_fp16)[name = string("op_4376_cast_fp16")]; tensor input_91_cast_fp16 = mul(x = var_4373_cast_fp16, y = var_4376_cast_fp16)[name = string("input_91_cast_fp16")]; tensor layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141656704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143753920))))[name = string("layers_9_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor var_4385 = const()[name = string("op_4385"), val = tensor([1, 1, 16, 128])]; tensor var_4386_cast_fp16 = reshape(shape = var_4385, x = linear_63_cast_fp16)[name = string("op_4386_cast_fp16")]; tensor layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143754496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803136))))[name = string("layers_9_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor var_4397 = const()[name = string("op_4397"), val = tensor([1, 1, 8, 128])]; tensor var_4398_cast_fp16 = reshape(shape = var_4397, x = linear_64_cast_fp16)[name = string("op_4398_cast_fp16")]; tensor layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144803712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852352))))[name = string("layers_9_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("linear_65_cast_fp16")]; fp16 var_4417_promoted_to_fp16 = const()[name = string("op_4417_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4423_cast_fp16 = pow(x = var_4386_cast_fp16, y = var_4417_promoted_to_fp16)[name = string("op_4423_cast_fp16")]; bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; tensor const_318 = const()[name = string("const_318"), val = tensor([3])]; tensor variance_75_cast_fp16 = reduce_mean(axes = const_318, keep_dims = variance_75_keep_dims_0, x = var_4423_cast_fp16)[name = string("variance_75_cast_fp16")]; tensor const_319_to_fp16 = const()[name = string("const_319_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145852928)))]; tensor var_4427_cast_fp16 = mul(x = const_319_to_fp16, y = var_4386_cast_fp16)[name = string("op_4427_cast_fp16")]; fp16 var_4428_to_fp16 = const()[name = string("op_4428_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4429_cast_fp16 = add(x = variance_75_cast_fp16, y = var_4428_to_fp16)[name = string("op_4429_cast_fp16")]; fp32 var_4430_epsilon_0 = const()[name = string("op_4430_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4430_cast_fp16 = rsqrt(epsilon = var_4430_epsilon_0, x = var_4429_cast_fp16)[name = string("op_4430_cast_fp16")]; tensor q_55_cast_fp16 = mul(x = var_4427_cast_fp16, y = var_4430_cast_fp16)[name = string("q_55_cast_fp16")]; fp16 var_4435_promoted_to_fp16 = const()[name = string("op_4435_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4441_cast_fp16 = pow(x = var_4398_cast_fp16, y = var_4435_promoted_to_fp16)[name = string("op_4441_cast_fp16")]; bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; tensor const_320 = const()[name = string("const_320"), val = tensor([3])]; tensor variance_77_cast_fp16 = reduce_mean(axes = const_320, keep_dims = variance_77_keep_dims_0, x = var_4441_cast_fp16)[name = string("variance_77_cast_fp16")]; tensor const_321_to_fp16 = const()[name = string("const_321_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853248)))]; tensor var_4445_cast_fp16 = mul(x = const_321_to_fp16, y = var_4398_cast_fp16)[name = string("op_4445_cast_fp16")]; fp16 var_4446_to_fp16 = const()[name = string("op_4446_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4447_cast_fp16 = add(x = variance_77_cast_fp16, y = var_4446_to_fp16)[name = string("op_4447_cast_fp16")]; fp32 var_4448_epsilon_0 = const()[name = string("op_4448_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4448_cast_fp16 = rsqrt(epsilon = var_4448_epsilon_0, x = var_4447_cast_fp16)[name = string("op_4448_cast_fp16")]; tensor k_55_cast_fp16 = mul(x = var_4445_cast_fp16, y = var_4448_cast_fp16)[name = string("k_55_cast_fp16")]; tensor var_4463_cast_fp16 = mul(x = q_55_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4463_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_55_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_55_cast_fp16)[name = string("x2_37_cast_fp16")]; fp16 const_96_promoted_to_fp16 = const()[name = string("const_96_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4484_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_96_promoted_to_fp16)[name = string("op_4484_cast_fp16")]; int32 var_4486 = const()[name = string("op_4486"), val = int32(-1)]; bool var_4487_interleave_0 = const()[name = string("op_4487_interleave_0"), val = bool(false)]; tensor var_4487_cast_fp16 = concat(axis = var_4486, interleave = var_4487_interleave_0, values = (var_4484_cast_fp16, x1_37_cast_fp16))[name = string("op_4487_cast_fp16")]; tensor var_4488_cast_fp16 = mul(x = var_4487_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4488_cast_fp16")]; tensor q_59_cast_fp16 = add(x = var_4463_cast_fp16, y = var_4488_cast_fp16)[name = string("q_59_cast_fp16")]; tensor var_4491_cast_fp16 = mul(x = k_55_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4491_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_55_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_55_cast_fp16)[name = string("x2_39_cast_fp16")]; fp16 const_99_promoted_to_fp16 = const()[name = string("const_99_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4512_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_99_promoted_to_fp16)[name = string("op_4512_cast_fp16")]; int32 var_4514 = const()[name = string("op_4514"), val = int32(-1)]; bool var_4515_interleave_0 = const()[name = string("op_4515_interleave_0"), val = bool(false)]; tensor var_4515_cast_fp16 = concat(axis = var_4514, interleave = var_4515_interleave_0, values = (var_4512_cast_fp16, x1_39_cast_fp16))[name = string("op_4515_cast_fp16")]; tensor var_4516_cast_fp16 = mul(x = var_4515_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4516_cast_fp16")]; tensor k_59_cast_fp16 = add(x = var_4491_cast_fp16, y = var_4516_cast_fp16)[name = string("k_59_cast_fp16")]; tensor var_4523 = const()[name = string("op_4523"), val = tensor([1, 1024, 1, 1])]; tensor nk_19_cast_fp16 = reshape(shape = var_4523, x = k_59_cast_fp16)[name = string("nk_19_cast_fp16")]; tensor var_4529 = const()[name = string("op_4529"), val = tensor([1, 1024, 1, 1])]; tensor nv_19_cast_fp16 = reshape(shape = var_4529, x = linear_65_cast_fp16)[name = string("nv_19_cast_fp16")]; tensor var_4534_cast_fp16 = mul(x = var_4323_cast_fp16, y = var_1203_cast_fp16)[name = string("op_4534_cast_fp16")]; tensor var_4535_cast_fp16 = mul(x = nk_19_cast_fp16, y = update_mask_cast_fp16)[name = string("op_4535_cast_fp16")]; tensor lkc_39_cast_fp16 = add(x = var_4534_cast_fp16, y = var_4535_cast_fp16)[name = string("lkc_39_cast_fp16")]; tensor var_4541_cast_fp16 = mul(x = var_4343_cast_fp16, y = var_1203_cast_fp16)[name = string("op_4541_cast_fp16")]; tensor var_4542_cast_fp16 = mul(x = nv_19_cast_fp16, y = update_mask_cast_fp16)[name = string("op_4542_cast_fp16")]; tensor lvc_39_cast_fp16 = add(x = var_4541_cast_fp16, y = var_4542_cast_fp16)[name = string("lvc_39_cast_fp16")]; tensor var_4546_axes_0 = const()[name = string("op_4546_axes_0"), val = tensor([2])]; tensor var_4546_cast_fp16 = squeeze(axes = var_4546_axes_0, x = lkc_39_cast_fp16)[name = string("op_4546_cast_fp16")]; tensor var_4551 = const()[name = string("op_4551"), val = tensor([1, 8, 128, 256])]; tensor kc_37_cast_fp16 = reshape(shape = var_4551, x = var_4546_cast_fp16)[name = string("kc_37_cast_fp16")]; tensor var_4554_axes_0 = const()[name = string("op_4554_axes_0"), val = tensor([2])]; tensor var_4554_cast_fp16 = squeeze(axes = var_4554_axes_0, x = lvc_39_cast_fp16)[name = string("op_4554_cast_fp16")]; tensor var_4559 = const()[name = string("op_4559"), val = tensor([1, 8, 128, 256])]; tensor vc_37_cast_fp16 = reshape(shape = var_4559, x = var_4554_cast_fp16)[name = string("vc_37_cast_fp16")]; tensor var_4562_axes_0 = const()[name = string("op_4562_axes_0"), val = tensor([2])]; tensor var_4562_cast_fp16 = expand_dims(axes = var_4562_axes_0, x = kc_37_cast_fp16)[name = string("op_4562_cast_fp16")]; tensor var_4570_reps_0 = const()[name = string("op_4570_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4570_cast_fp16 = tile(reps = var_4570_reps_0, x = var_4562_cast_fp16)[name = string("op_4570_cast_fp16")]; tensor var_4575 = const()[name = string("op_4575"), val = tensor([1, 16, 128, 256])]; tensor kc_39_cast_fp16 = reshape(shape = var_4575, x = var_4570_cast_fp16)[name = string("kc_39_cast_fp16")]; tensor var_4578_axes_0 = const()[name = string("op_4578_axes_0"), val = tensor([2])]; tensor var_4578_cast_fp16 = expand_dims(axes = var_4578_axes_0, x = vc_37_cast_fp16)[name = string("op_4578_cast_fp16")]; tensor var_4586_reps_0 = const()[name = string("op_4586_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4586_cast_fp16 = tile(reps = var_4586_reps_0, x = var_4578_cast_fp16)[name = string("op_4586_cast_fp16")]; tensor var_4591 = const()[name = string("op_4591"), val = tensor([1, 16, 128, 256])]; tensor vc_39_cast_fp16 = reshape(shape = var_4591, x = var_4586_cast_fp16)[name = string("vc_39_cast_fp16")]; tensor var_4595_perm_0 = const()[name = string("op_4595_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_4596_transpose_x_0 = const()[name = string("op_4596_transpose_x_0"), val = bool(false)]; bool var_4596_transpose_y_0 = const()[name = string("op_4596_transpose_y_0"), val = bool(false)]; tensor var_4595_cast_fp16 = transpose(perm = var_4595_perm_0, x = q_59_cast_fp16)[name = string("transpose_37")]; tensor var_4596_cast_fp16 = matmul(transpose_x = var_4596_transpose_x_0, transpose_y = var_4596_transpose_y_0, x = var_4595_cast_fp16, y = kc_39_cast_fp16)[name = string("op_4596_cast_fp16")]; fp16 _inversed_aw_73_y_0_to_fp16 = const()[name = string("_inversed_aw_73_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_73_cast_fp16 = mul(x = var_4596_cast_fp16, y = _inversed_aw_73_y_0_to_fp16)[name = string("_inversed_aw_73_cast_fp16")]; tensor aw_75_cast_fp16 = add(x = _inversed_aw_73_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_75_cast_fp16")]; int32 var_4610 = const()[name = string("op_4610"), val = int32(-1)]; tensor aw_79_cast_fp16 = softmax(axis = var_4610, x = aw_75_cast_fp16)[name = string("aw_79_cast_fp16")]; bool var_4616_transpose_x_1 = const()[name = string("op_4616_transpose_x_1"), val = bool(false)]; bool var_4616_transpose_y_1 = const()[name = string("op_4616_transpose_y_1"), val = bool(true)]; tensor var_4616_cast_fp16 = matmul(transpose_x = var_4616_transpose_x_1, transpose_y = var_4616_transpose_y_1, x = aw_79_cast_fp16, y = vc_39_cast_fp16)[name = string("op_4616_cast_fp16")]; tensor var_4619_perm_0 = const()[name = string("op_4619_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4623 = const()[name = string("op_4623"), val = tensor([1, 1, -1])]; tensor var_4619_cast_fp16 = transpose(perm = var_4619_perm_0, x = var_4616_cast_fp16)[name = string("transpose_36")]; tensor input_93_cast_fp16 = reshape(shape = var_4623, x = var_4619_cast_fp16)[name = string("input_93_cast_fp16")]; tensor layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145853568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147950784))))[name = string("layers_9_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor var_4629_axes_0 = const()[name = string("op_4629_axes_0"), val = tensor([0])]; tensor var_4629_cast_fp16 = squeeze(axes = var_4629_axes_0, x = linear_66_cast_fp16)[name = string("op_4629_cast_fp16")]; tensor var_4631_axes_0 = const()[name = string("op_4631_axes_0"), val = tensor([0])]; tensor var_4631_cast_fp16 = squeeze(axes = var_4631_axes_0, x = var_4629_cast_fp16)[name = string("op_4631_cast_fp16")]; tensor var_4633_axes_0 = const()[name = string("op_4633_axes_0"), val = tensor([-1])]; tensor var_4633_cast_fp16 = expand_dims(axes = var_4633_axes_0, x = var_4631_cast_fp16)[name = string("op_4633_cast_fp16")]; tensor ao_19_axes_0 = const()[name = string("ao_19_axes_0"), val = tensor([-1])]; tensor ao_19_cast_fp16 = expand_dims(axes = ao_19_axes_0, x = var_4633_cast_fp16)[name = string("ao_19_cast_fp16")]; tensor hidden_37_cast_fp16 = add(x = hidden_35_cast_fp16, y = ao_19_cast_fp16)[name = string("hidden_37_cast_fp16")]; tensor var_4639_axes_0 = const()[name = string("op_4639_axes_0"), val = tensor([-1])]; tensor var_4639_cast_fp16 = squeeze(axes = var_4639_axes_0, x = hidden_37_cast_fp16)[name = string("op_4639_cast_fp16")]; tensor var_4641_axes_0 = const()[name = string("op_4641_axes_0"), val = tensor([-1])]; tensor var_4641_cast_fp16 = squeeze(axes = var_4641_axes_0, x = var_4639_cast_fp16)[name = string("op_4641_cast_fp16")]; tensor hidden_states_157_axes_0 = const()[name = string("hidden_states_157_axes_0"), val = tensor([0])]; tensor hidden_states_157_cast_fp16 = expand_dims(axes = hidden_states_157_axes_0, x = var_4641_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; fp16 var_4647_promoted_to_fp16 = const()[name = string("op_4647_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4653_cast_fp16 = pow(x = hidden_states_157_cast_fp16, y = var_4647_promoted_to_fp16)[name = string("op_4653_cast_fp16")]; tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_4653_cast_fp16)[name = string("variance_79_cast_fp16")]; tensor const_100_to_fp16 = const()[name = string("const_100_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147951360)))]; tensor var_4657_cast_fp16 = mul(x = const_100_to_fp16, y = hidden_states_157_cast_fp16)[name = string("op_4657_cast_fp16")]; fp16 var_4658_to_fp16 = const()[name = string("op_4658_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4659_cast_fp16 = add(x = variance_79_cast_fp16, y = var_4658_to_fp16)[name = string("op_4659_cast_fp16")]; fp32 var_4660_epsilon_0 = const()[name = string("op_4660_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4660_cast_fp16 = rsqrt(epsilon = var_4660_epsilon_0, x = var_4659_cast_fp16)[name = string("op_4660_cast_fp16")]; tensor input_95_cast_fp16 = mul(x = var_4657_cast_fp16, y = var_4660_cast_fp16)[name = string("input_95_cast_fp16")]; tensor layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147953472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099264))))[name = string("layers_9_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_4668_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_4668_cast_fp16")]; tensor layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151099840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154245632))))[name = string("layers_9_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_9_mlp_up_proj_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_99_cast_fp16 = mul(x = var_4668_cast_fp16, y = linear_68_cast_fp16)[name = string("input_99_cast_fp16")]; tensor layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154246208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392000))))[name = string("layers_9_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor var_4675_axes_0 = const()[name = string("op_4675_axes_0"), val = tensor([0])]; tensor var_4675_cast_fp16 = squeeze(axes = var_4675_axes_0, x = linear_69_cast_fp16)[name = string("op_4675_cast_fp16")]; tensor var_4677_axes_0 = const()[name = string("op_4677_axes_0"), val = tensor([0])]; tensor var_4677_cast_fp16 = squeeze(axes = var_4677_axes_0, x = var_4675_cast_fp16)[name = string("op_4677_cast_fp16")]; tensor var_4679_axes_0 = const()[name = string("op_4679_axes_0"), val = tensor([-1])]; tensor var_4679_cast_fp16 = expand_dims(axes = var_4679_axes_0, x = var_4677_cast_fp16)[name = string("op_4679_cast_fp16")]; tensor h_19_axes_0 = const()[name = string("h_19_axes_0"), val = tensor([-1])]; tensor h_19_cast_fp16 = expand_dims(axes = h_19_axes_0, x = var_4679_cast_fp16)[name = string("h_19_cast_fp16")]; tensor hidden_39_cast_fp16 = add(x = hidden_37_cast_fp16, y = h_19_cast_fp16)[name = string("hidden_39_cast_fp16")]; tensor var_4693_begin_0 = const()[name = string("op_4693_begin_0"), val = tensor([0, 10240, 0, 0])]; tensor var_4693_end_0 = const()[name = string("op_4693_end_0"), val = tensor([1, 11264, 1, 256])]; tensor var_4693_end_mask_0 = const()[name = string("op_4693_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4693_cast_fp16 = slice_by_index(begin = var_4693_begin_0, end = var_4693_end_0, end_mask = var_4693_end_mask_0, x = key_cache)[name = string("op_4693_cast_fp16")]; tensor var_4713_begin_0 = const()[name = string("op_4713_begin_0"), val = tensor([0, 10240, 0, 0])]; tensor var_4713_end_0 = const()[name = string("op_4713_end_0"), val = tensor([1, 11264, 1, 256])]; tensor var_4713_end_mask_0 = const()[name = string("op_4713_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4713_cast_fp16 = slice_by_index(begin = var_4713_begin_0, end = var_4713_end_0, end_mask = var_4713_end_mask_0, x = value_cache)[name = string("op_4713_cast_fp16")]; tensor var_4725_axes_0 = const()[name = string("op_4725_axes_0"), val = tensor([-1])]; tensor var_4725_cast_fp16 = squeeze(axes = var_4725_axes_0, x = hidden_39_cast_fp16)[name = string("op_4725_cast_fp16")]; tensor var_4727_axes_0 = const()[name = string("op_4727_axes_0"), val = tensor([-1])]; tensor var_4727_cast_fp16 = squeeze(axes = var_4727_axes_0, x = var_4725_cast_fp16)[name = string("op_4727_cast_fp16")]; tensor hidden_states_161_axes_0 = const()[name = string("hidden_states_161_axes_0"), val = tensor([0])]; tensor hidden_states_161_cast_fp16 = expand_dims(axes = hidden_states_161_axes_0, x = var_4727_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; fp16 var_4733_promoted_to_fp16 = const()[name = string("op_4733_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4739_cast_fp16 = pow(x = hidden_states_161_cast_fp16, y = var_4733_promoted_to_fp16)[name = string("op_4739_cast_fp16")]; tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_4739_cast_fp16)[name = string("variance_81_cast_fp16")]; tensor const_101_to_fp16 = const()[name = string("const_101_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157392576)))]; tensor var_4743_cast_fp16 = mul(x = const_101_to_fp16, y = hidden_states_161_cast_fp16)[name = string("op_4743_cast_fp16")]; fp16 var_4744_to_fp16 = const()[name = string("op_4744_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4745_cast_fp16 = add(x = variance_81_cast_fp16, y = var_4744_to_fp16)[name = string("op_4745_cast_fp16")]; fp32 var_4746_epsilon_0 = const()[name = string("op_4746_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4746_cast_fp16 = rsqrt(epsilon = var_4746_epsilon_0, x = var_4745_cast_fp16)[name = string("op_4746_cast_fp16")]; tensor input_101_cast_fp16 = mul(x = var_4743_cast_fp16, y = var_4746_cast_fp16)[name = string("input_101_cast_fp16")]; tensor layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157394688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159491904))))[name = string("layers_10_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor var_4755 = const()[name = string("op_4755"), val = tensor([1, 1, 16, 128])]; tensor var_4756_cast_fp16 = reshape(shape = var_4755, x = linear_70_cast_fp16)[name = string("op_4756_cast_fp16")]; tensor layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159492480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541120))))[name = string("layers_10_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor var_4767 = const()[name = string("op_4767"), val = tensor([1, 1, 8, 128])]; tensor var_4768_cast_fp16 = reshape(shape = var_4767, x = linear_71_cast_fp16)[name = string("op_4768_cast_fp16")]; tensor layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160541696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590336))))[name = string("layers_10_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("linear_72_cast_fp16")]; fp16 var_4787_promoted_to_fp16 = const()[name = string("op_4787_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4793_cast_fp16 = pow(x = var_4756_cast_fp16, y = var_4787_promoted_to_fp16)[name = string("op_4793_cast_fp16")]; bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; tensor const_322 = const()[name = string("const_322"), val = tensor([3])]; tensor variance_83_cast_fp16 = reduce_mean(axes = const_322, keep_dims = variance_83_keep_dims_0, x = var_4793_cast_fp16)[name = string("variance_83_cast_fp16")]; tensor const_323_to_fp16 = const()[name = string("const_323_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161590912)))]; tensor var_4797_cast_fp16 = mul(x = const_323_to_fp16, y = var_4756_cast_fp16)[name = string("op_4797_cast_fp16")]; fp16 var_4798_to_fp16 = const()[name = string("op_4798_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4799_cast_fp16 = add(x = variance_83_cast_fp16, y = var_4798_to_fp16)[name = string("op_4799_cast_fp16")]; fp32 var_4800_epsilon_0 = const()[name = string("op_4800_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4800_cast_fp16 = rsqrt(epsilon = var_4800_epsilon_0, x = var_4799_cast_fp16)[name = string("op_4800_cast_fp16")]; tensor q_61_cast_fp16 = mul(x = var_4797_cast_fp16, y = var_4800_cast_fp16)[name = string("q_61_cast_fp16")]; fp16 var_4805_promoted_to_fp16 = const()[name = string("op_4805_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_4811_cast_fp16 = pow(x = var_4768_cast_fp16, y = var_4805_promoted_to_fp16)[name = string("op_4811_cast_fp16")]; bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; tensor const_324 = const()[name = string("const_324"), val = tensor([3])]; tensor variance_85_cast_fp16 = reduce_mean(axes = const_324, keep_dims = variance_85_keep_dims_0, x = var_4811_cast_fp16)[name = string("variance_85_cast_fp16")]; tensor const_325_to_fp16 = const()[name = string("const_325_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591232)))]; tensor var_4815_cast_fp16 = mul(x = const_325_to_fp16, y = var_4768_cast_fp16)[name = string("op_4815_cast_fp16")]; fp16 var_4816_to_fp16 = const()[name = string("op_4816_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4817_cast_fp16 = add(x = variance_85_cast_fp16, y = var_4816_to_fp16)[name = string("op_4817_cast_fp16")]; fp32 var_4818_epsilon_0 = const()[name = string("op_4818_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4818_cast_fp16 = rsqrt(epsilon = var_4818_epsilon_0, x = var_4817_cast_fp16)[name = string("op_4818_cast_fp16")]; tensor k_61_cast_fp16 = mul(x = var_4815_cast_fp16, y = var_4818_cast_fp16)[name = string("k_61_cast_fp16")]; tensor var_4833_cast_fp16 = mul(x = q_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4833_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_61_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_61_cast_fp16)[name = string("x2_41_cast_fp16")]; fp16 const_106_promoted_to_fp16 = const()[name = string("const_106_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4854_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_106_promoted_to_fp16)[name = string("op_4854_cast_fp16")]; int32 var_4856 = const()[name = string("op_4856"), val = int32(-1)]; bool var_4857_interleave_0 = const()[name = string("op_4857_interleave_0"), val = bool(false)]; tensor var_4857_cast_fp16 = concat(axis = var_4856, interleave = var_4857_interleave_0, values = (var_4854_cast_fp16, x1_41_cast_fp16))[name = string("op_4857_cast_fp16")]; tensor var_4858_cast_fp16 = mul(x = var_4857_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4858_cast_fp16")]; tensor q_65_cast_fp16 = add(x = var_4833_cast_fp16, y = var_4858_cast_fp16)[name = string("q_65_cast_fp16")]; tensor var_4861_cast_fp16 = mul(x = k_61_cast_fp16, y = cos_1_cast_fp16)[name = string("op_4861_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_61_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_61_cast_fp16)[name = string("x2_43_cast_fp16")]; fp16 const_109_promoted_to_fp16 = const()[name = string("const_109_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4882_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_109_promoted_to_fp16)[name = string("op_4882_cast_fp16")]; int32 var_4884 = const()[name = string("op_4884"), val = int32(-1)]; bool var_4885_interleave_0 = const()[name = string("op_4885_interleave_0"), val = bool(false)]; tensor var_4885_cast_fp16 = concat(axis = var_4884, interleave = var_4885_interleave_0, values = (var_4882_cast_fp16, x1_43_cast_fp16))[name = string("op_4885_cast_fp16")]; tensor var_4886_cast_fp16 = mul(x = var_4885_cast_fp16, y = sin_1_cast_fp16)[name = string("op_4886_cast_fp16")]; tensor k_65_cast_fp16 = add(x = var_4861_cast_fp16, y = var_4886_cast_fp16)[name = string("k_65_cast_fp16")]; tensor var_4893 = const()[name = string("op_4893"), val = tensor([1, 1024, 1, 1])]; tensor nk_21_cast_fp16 = reshape(shape = var_4893, x = k_65_cast_fp16)[name = string("nk_21_cast_fp16")]; tensor var_4899 = const()[name = string("op_4899"), val = tensor([1, 1024, 1, 1])]; tensor nv_21_cast_fp16 = reshape(shape = var_4899, x = linear_72_cast_fp16)[name = string("nv_21_cast_fp16")]; tensor var_4904_cast_fp16 = mul(x = var_4693_cast_fp16, y = var_1203_cast_fp16)[name = string("op_4904_cast_fp16")]; tensor var_4905_cast_fp16 = mul(x = nk_21_cast_fp16, y = update_mask_cast_fp16)[name = string("op_4905_cast_fp16")]; tensor lkc_43_cast_fp16 = add(x = var_4904_cast_fp16, y = var_4905_cast_fp16)[name = string("lkc_43_cast_fp16")]; tensor var_4911_cast_fp16 = mul(x = var_4713_cast_fp16, y = var_1203_cast_fp16)[name = string("op_4911_cast_fp16")]; tensor var_4912_cast_fp16 = mul(x = nv_21_cast_fp16, y = update_mask_cast_fp16)[name = string("op_4912_cast_fp16")]; tensor lvc_43_cast_fp16 = add(x = var_4911_cast_fp16, y = var_4912_cast_fp16)[name = string("lvc_43_cast_fp16")]; tensor var_4916_axes_0 = const()[name = string("op_4916_axes_0"), val = tensor([2])]; tensor var_4916_cast_fp16 = squeeze(axes = var_4916_axes_0, x = lkc_43_cast_fp16)[name = string("op_4916_cast_fp16")]; tensor var_4921 = const()[name = string("op_4921"), val = tensor([1, 8, 128, 256])]; tensor kc_41_cast_fp16 = reshape(shape = var_4921, x = var_4916_cast_fp16)[name = string("kc_41_cast_fp16")]; tensor var_4924_axes_0 = const()[name = string("op_4924_axes_0"), val = tensor([2])]; tensor var_4924_cast_fp16 = squeeze(axes = var_4924_axes_0, x = lvc_43_cast_fp16)[name = string("op_4924_cast_fp16")]; tensor var_4929 = const()[name = string("op_4929"), val = tensor([1, 8, 128, 256])]; tensor vc_41_cast_fp16 = reshape(shape = var_4929, x = var_4924_cast_fp16)[name = string("vc_41_cast_fp16")]; tensor var_4932_axes_0 = const()[name = string("op_4932_axes_0"), val = tensor([2])]; tensor var_4932_cast_fp16 = expand_dims(axes = var_4932_axes_0, x = kc_41_cast_fp16)[name = string("op_4932_cast_fp16")]; tensor var_4940_reps_0 = const()[name = string("op_4940_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4940_cast_fp16 = tile(reps = var_4940_reps_0, x = var_4932_cast_fp16)[name = string("op_4940_cast_fp16")]; tensor var_4945 = const()[name = string("op_4945"), val = tensor([1, 16, 128, 256])]; tensor kc_43_cast_fp16 = reshape(shape = var_4945, x = var_4940_cast_fp16)[name = string("kc_43_cast_fp16")]; tensor var_4948_axes_0 = const()[name = string("op_4948_axes_0"), val = tensor([2])]; tensor var_4948_cast_fp16 = expand_dims(axes = var_4948_axes_0, x = vc_41_cast_fp16)[name = string("op_4948_cast_fp16")]; tensor var_4956_reps_0 = const()[name = string("op_4956_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_4956_cast_fp16 = tile(reps = var_4956_reps_0, x = var_4948_cast_fp16)[name = string("op_4956_cast_fp16")]; tensor var_4961 = const()[name = string("op_4961"), val = tensor([1, 16, 128, 256])]; tensor vc_43_cast_fp16 = reshape(shape = var_4961, x = var_4956_cast_fp16)[name = string("vc_43_cast_fp16")]; tensor var_4965_perm_0 = const()[name = string("op_4965_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_4966_transpose_x_0 = const()[name = string("op_4966_transpose_x_0"), val = bool(false)]; bool var_4966_transpose_y_0 = const()[name = string("op_4966_transpose_y_0"), val = bool(false)]; tensor var_4965_cast_fp16 = transpose(perm = var_4965_perm_0, x = q_65_cast_fp16)[name = string("transpose_35")]; tensor var_4966_cast_fp16 = matmul(transpose_x = var_4966_transpose_x_0, transpose_y = var_4966_transpose_y_0, x = var_4965_cast_fp16, y = kc_43_cast_fp16)[name = string("op_4966_cast_fp16")]; fp16 _inversed_aw_81_y_0_to_fp16 = const()[name = string("_inversed_aw_81_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_81_cast_fp16 = mul(x = var_4966_cast_fp16, y = _inversed_aw_81_y_0_to_fp16)[name = string("_inversed_aw_81_cast_fp16")]; tensor aw_83_cast_fp16 = add(x = _inversed_aw_81_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_83_cast_fp16")]; int32 var_4980 = const()[name = string("op_4980"), val = int32(-1)]; tensor aw_87_cast_fp16 = softmax(axis = var_4980, x = aw_83_cast_fp16)[name = string("aw_87_cast_fp16")]; bool var_4986_transpose_x_1 = const()[name = string("op_4986_transpose_x_1"), val = bool(false)]; bool var_4986_transpose_y_1 = const()[name = string("op_4986_transpose_y_1"), val = bool(true)]; tensor var_4986_cast_fp16 = matmul(transpose_x = var_4986_transpose_x_1, transpose_y = var_4986_transpose_y_1, x = aw_87_cast_fp16, y = vc_43_cast_fp16)[name = string("op_4986_cast_fp16")]; tensor var_4989_perm_0 = const()[name = string("op_4989_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_4993 = const()[name = string("op_4993"), val = tensor([1, 1, -1])]; tensor var_4989_cast_fp16 = transpose(perm = var_4989_perm_0, x = var_4986_cast_fp16)[name = string("transpose_34")]; tensor input_103_cast_fp16 = reshape(shape = var_4993, x = var_4989_cast_fp16)[name = string("input_103_cast_fp16")]; tensor layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161591552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163688768))))[name = string("layers_10_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor var_4999_axes_0 = const()[name = string("op_4999_axes_0"), val = tensor([0])]; tensor var_4999_cast_fp16 = squeeze(axes = var_4999_axes_0, x = linear_73_cast_fp16)[name = string("op_4999_cast_fp16")]; tensor var_5001_axes_0 = const()[name = string("op_5001_axes_0"), val = tensor([0])]; tensor var_5001_cast_fp16 = squeeze(axes = var_5001_axes_0, x = var_4999_cast_fp16)[name = string("op_5001_cast_fp16")]; tensor var_5003_axes_0 = const()[name = string("op_5003_axes_0"), val = tensor([-1])]; tensor var_5003_cast_fp16 = expand_dims(axes = var_5003_axes_0, x = var_5001_cast_fp16)[name = string("op_5003_cast_fp16")]; tensor ao_21_axes_0 = const()[name = string("ao_21_axes_0"), val = tensor([-1])]; tensor ao_21_cast_fp16 = expand_dims(axes = ao_21_axes_0, x = var_5003_cast_fp16)[name = string("ao_21_cast_fp16")]; tensor hidden_41_cast_fp16 = add(x = hidden_39_cast_fp16, y = ao_21_cast_fp16)[name = string("hidden_41_cast_fp16")]; tensor var_5009_axes_0 = const()[name = string("op_5009_axes_0"), val = tensor([-1])]; tensor var_5009_cast_fp16 = squeeze(axes = var_5009_axes_0, x = hidden_41_cast_fp16)[name = string("op_5009_cast_fp16")]; tensor var_5011_axes_0 = const()[name = string("op_5011_axes_0"), val = tensor([-1])]; tensor var_5011_cast_fp16 = squeeze(axes = var_5011_axes_0, x = var_5009_cast_fp16)[name = string("op_5011_cast_fp16")]; tensor hidden_states_173_axes_0 = const()[name = string("hidden_states_173_axes_0"), val = tensor([0])]; tensor hidden_states_173_cast_fp16 = expand_dims(axes = hidden_states_173_axes_0, x = var_5011_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; fp16 var_5017_promoted_to_fp16 = const()[name = string("op_5017_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5023_cast_fp16 = pow(x = hidden_states_173_cast_fp16, y = var_5017_promoted_to_fp16)[name = string("op_5023_cast_fp16")]; tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_5023_cast_fp16)[name = string("variance_87_cast_fp16")]; tensor const_110_to_fp16 = const()[name = string("const_110_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689344)))]; tensor var_5027_cast_fp16 = mul(x = const_110_to_fp16, y = hidden_states_173_cast_fp16)[name = string("op_5027_cast_fp16")]; fp16 var_5028_to_fp16 = const()[name = string("op_5028_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5029_cast_fp16 = add(x = variance_87_cast_fp16, y = var_5028_to_fp16)[name = string("op_5029_cast_fp16")]; fp32 var_5030_epsilon_0 = const()[name = string("op_5030_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5030_cast_fp16 = rsqrt(epsilon = var_5030_epsilon_0, x = var_5029_cast_fp16)[name = string("op_5030_cast_fp16")]; tensor input_105_cast_fp16 = mul(x = var_5027_cast_fp16, y = var_5030_cast_fp16)[name = string("input_105_cast_fp16")]; tensor layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163691456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837248))))[name = string("layers_10_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_5038_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_5038_cast_fp16")]; tensor layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166837824))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169983616))))[name = string("layers_10_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_10_mlp_up_proj_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_109_cast_fp16 = mul(x = var_5038_cast_fp16, y = linear_75_cast_fp16)[name = string("input_109_cast_fp16")]; tensor layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169984192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173129984))))[name = string("layers_10_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor var_5045_axes_0 = const()[name = string("op_5045_axes_0"), val = tensor([0])]; tensor var_5045_cast_fp16 = squeeze(axes = var_5045_axes_0, x = linear_76_cast_fp16)[name = string("op_5045_cast_fp16")]; tensor var_5047_axes_0 = const()[name = string("op_5047_axes_0"), val = tensor([0])]; tensor var_5047_cast_fp16 = squeeze(axes = var_5047_axes_0, x = var_5045_cast_fp16)[name = string("op_5047_cast_fp16")]; tensor var_5049_axes_0 = const()[name = string("op_5049_axes_0"), val = tensor([-1])]; tensor var_5049_cast_fp16 = expand_dims(axes = var_5049_axes_0, x = var_5047_cast_fp16)[name = string("op_5049_cast_fp16")]; tensor h_21_axes_0 = const()[name = string("h_21_axes_0"), val = tensor([-1])]; tensor h_21_cast_fp16 = expand_dims(axes = h_21_axes_0, x = var_5049_cast_fp16)[name = string("h_21_cast_fp16")]; tensor hidden_43_cast_fp16 = add(x = hidden_41_cast_fp16, y = h_21_cast_fp16)[name = string("hidden_43_cast_fp16")]; tensor var_5063_begin_0 = const()[name = string("op_5063_begin_0"), val = tensor([0, 11264, 0, 0])]; tensor var_5063_end_0 = const()[name = string("op_5063_end_0"), val = tensor([1, 12288, 1, 256])]; tensor var_5063_end_mask_0 = const()[name = string("op_5063_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5063_cast_fp16 = slice_by_index(begin = var_5063_begin_0, end = var_5063_end_0, end_mask = var_5063_end_mask_0, x = key_cache)[name = string("op_5063_cast_fp16")]; tensor var_5083_begin_0 = const()[name = string("op_5083_begin_0"), val = tensor([0, 11264, 0, 0])]; tensor var_5083_end_0 = const()[name = string("op_5083_end_0"), val = tensor([1, 12288, 1, 256])]; tensor var_5083_end_mask_0 = const()[name = string("op_5083_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5083_cast_fp16 = slice_by_index(begin = var_5083_begin_0, end = var_5083_end_0, end_mask = var_5083_end_mask_0, x = value_cache)[name = string("op_5083_cast_fp16")]; tensor var_5095_axes_0 = const()[name = string("op_5095_axes_0"), val = tensor([-1])]; tensor var_5095_cast_fp16 = squeeze(axes = var_5095_axes_0, x = hidden_43_cast_fp16)[name = string("op_5095_cast_fp16")]; tensor var_5097_axes_0 = const()[name = string("op_5097_axes_0"), val = tensor([-1])]; tensor var_5097_cast_fp16 = squeeze(axes = var_5097_axes_0, x = var_5095_cast_fp16)[name = string("op_5097_cast_fp16")]; tensor hidden_states_177_axes_0 = const()[name = string("hidden_states_177_axes_0"), val = tensor([0])]; tensor hidden_states_177_cast_fp16 = expand_dims(axes = hidden_states_177_axes_0, x = var_5097_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; fp16 var_5103_promoted_to_fp16 = const()[name = string("op_5103_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5109_cast_fp16 = pow(x = hidden_states_177_cast_fp16, y = var_5103_promoted_to_fp16)[name = string("op_5109_cast_fp16")]; tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_5109_cast_fp16)[name = string("variance_89_cast_fp16")]; tensor const_111_to_fp16 = const()[name = string("const_111_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173130560)))]; tensor var_5113_cast_fp16 = mul(x = const_111_to_fp16, y = hidden_states_177_cast_fp16)[name = string("op_5113_cast_fp16")]; fp16 var_5114_to_fp16 = const()[name = string("op_5114_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5115_cast_fp16 = add(x = variance_89_cast_fp16, y = var_5114_to_fp16)[name = string("op_5115_cast_fp16")]; fp32 var_5116_epsilon_0 = const()[name = string("op_5116_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5116_cast_fp16 = rsqrt(epsilon = var_5116_epsilon_0, x = var_5115_cast_fp16)[name = string("op_5116_cast_fp16")]; tensor input_111_cast_fp16 = mul(x = var_5113_cast_fp16, y = var_5116_cast_fp16)[name = string("input_111_cast_fp16")]; tensor layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173132672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175229888))))[name = string("layers_11_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor var_5125 = const()[name = string("op_5125"), val = tensor([1, 1, 16, 128])]; tensor var_5126_cast_fp16 = reshape(shape = var_5125, x = linear_77_cast_fp16)[name = string("op_5126_cast_fp16")]; tensor layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(175230464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279104))))[name = string("layers_11_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor var_5137 = const()[name = string("op_5137"), val = tensor([1, 1, 8, 128])]; tensor var_5138_cast_fp16 = reshape(shape = var_5137, x = linear_78_cast_fp16)[name = string("op_5138_cast_fp16")]; tensor layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176279680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328320))))[name = string("layers_11_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("linear_79_cast_fp16")]; fp16 var_5157_promoted_to_fp16 = const()[name = string("op_5157_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5163_cast_fp16 = pow(x = var_5126_cast_fp16, y = var_5157_promoted_to_fp16)[name = string("op_5163_cast_fp16")]; bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; tensor const_326 = const()[name = string("const_326"), val = tensor([3])]; tensor variance_91_cast_fp16 = reduce_mean(axes = const_326, keep_dims = variance_91_keep_dims_0, x = var_5163_cast_fp16)[name = string("variance_91_cast_fp16")]; tensor const_327_to_fp16 = const()[name = string("const_327_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177328896)))]; tensor var_5167_cast_fp16 = mul(x = const_327_to_fp16, y = var_5126_cast_fp16)[name = string("op_5167_cast_fp16")]; fp16 var_5168_to_fp16 = const()[name = string("op_5168_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5169_cast_fp16 = add(x = variance_91_cast_fp16, y = var_5168_to_fp16)[name = string("op_5169_cast_fp16")]; fp32 var_5170_epsilon_0 = const()[name = string("op_5170_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5170_cast_fp16 = rsqrt(epsilon = var_5170_epsilon_0, x = var_5169_cast_fp16)[name = string("op_5170_cast_fp16")]; tensor q_67_cast_fp16 = mul(x = var_5167_cast_fp16, y = var_5170_cast_fp16)[name = string("q_67_cast_fp16")]; fp16 var_5175_promoted_to_fp16 = const()[name = string("op_5175_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5181_cast_fp16 = pow(x = var_5138_cast_fp16, y = var_5175_promoted_to_fp16)[name = string("op_5181_cast_fp16")]; bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; tensor const_328 = const()[name = string("const_328"), val = tensor([3])]; tensor variance_93_cast_fp16 = reduce_mean(axes = const_328, keep_dims = variance_93_keep_dims_0, x = var_5181_cast_fp16)[name = string("variance_93_cast_fp16")]; tensor const_329_to_fp16 = const()[name = string("const_329_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329216)))]; tensor var_5185_cast_fp16 = mul(x = const_329_to_fp16, y = var_5138_cast_fp16)[name = string("op_5185_cast_fp16")]; fp16 var_5186_to_fp16 = const()[name = string("op_5186_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5187_cast_fp16 = add(x = variance_93_cast_fp16, y = var_5186_to_fp16)[name = string("op_5187_cast_fp16")]; fp32 var_5188_epsilon_0 = const()[name = string("op_5188_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5188_cast_fp16 = rsqrt(epsilon = var_5188_epsilon_0, x = var_5187_cast_fp16)[name = string("op_5188_cast_fp16")]; tensor k_67_cast_fp16 = mul(x = var_5185_cast_fp16, y = var_5188_cast_fp16)[name = string("k_67_cast_fp16")]; tensor var_5203_cast_fp16 = mul(x = q_67_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5203_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_67_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_67_cast_fp16)[name = string("x2_45_cast_fp16")]; fp16 const_116_promoted_to_fp16 = const()[name = string("const_116_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5224_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_116_promoted_to_fp16)[name = string("op_5224_cast_fp16")]; int32 var_5226 = const()[name = string("op_5226"), val = int32(-1)]; bool var_5227_interleave_0 = const()[name = string("op_5227_interleave_0"), val = bool(false)]; tensor var_5227_cast_fp16 = concat(axis = var_5226, interleave = var_5227_interleave_0, values = (var_5224_cast_fp16, x1_45_cast_fp16))[name = string("op_5227_cast_fp16")]; tensor var_5228_cast_fp16 = mul(x = var_5227_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5228_cast_fp16")]; tensor q_71_cast_fp16 = add(x = var_5203_cast_fp16, y = var_5228_cast_fp16)[name = string("q_71_cast_fp16")]; tensor var_5231_cast_fp16 = mul(x = k_67_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5231_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_67_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_67_cast_fp16)[name = string("x2_47_cast_fp16")]; fp16 const_119_promoted_to_fp16 = const()[name = string("const_119_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5252_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_119_promoted_to_fp16)[name = string("op_5252_cast_fp16")]; int32 var_5254 = const()[name = string("op_5254"), val = int32(-1)]; bool var_5255_interleave_0 = const()[name = string("op_5255_interleave_0"), val = bool(false)]; tensor var_5255_cast_fp16 = concat(axis = var_5254, interleave = var_5255_interleave_0, values = (var_5252_cast_fp16, x1_47_cast_fp16))[name = string("op_5255_cast_fp16")]; tensor var_5256_cast_fp16 = mul(x = var_5255_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5256_cast_fp16")]; tensor k_71_cast_fp16 = add(x = var_5231_cast_fp16, y = var_5256_cast_fp16)[name = string("k_71_cast_fp16")]; tensor var_5263 = const()[name = string("op_5263"), val = tensor([1, 1024, 1, 1])]; tensor nk_23_cast_fp16 = reshape(shape = var_5263, x = k_71_cast_fp16)[name = string("nk_23_cast_fp16")]; tensor var_5269 = const()[name = string("op_5269"), val = tensor([1, 1024, 1, 1])]; tensor nv_23_cast_fp16 = reshape(shape = var_5269, x = linear_79_cast_fp16)[name = string("nv_23_cast_fp16")]; tensor var_5274_cast_fp16 = mul(x = var_5063_cast_fp16, y = var_1203_cast_fp16)[name = string("op_5274_cast_fp16")]; tensor var_5275_cast_fp16 = mul(x = nk_23_cast_fp16, y = update_mask_cast_fp16)[name = string("op_5275_cast_fp16")]; tensor lkc_47_cast_fp16 = add(x = var_5274_cast_fp16, y = var_5275_cast_fp16)[name = string("lkc_47_cast_fp16")]; tensor var_5281_cast_fp16 = mul(x = var_5083_cast_fp16, y = var_1203_cast_fp16)[name = string("op_5281_cast_fp16")]; tensor var_5282_cast_fp16 = mul(x = nv_23_cast_fp16, y = update_mask_cast_fp16)[name = string("op_5282_cast_fp16")]; tensor lvc_47_cast_fp16 = add(x = var_5281_cast_fp16, y = var_5282_cast_fp16)[name = string("lvc_47_cast_fp16")]; tensor var_5286_axes_0 = const()[name = string("op_5286_axes_0"), val = tensor([2])]; tensor var_5286_cast_fp16 = squeeze(axes = var_5286_axes_0, x = lkc_47_cast_fp16)[name = string("op_5286_cast_fp16")]; tensor var_5291 = const()[name = string("op_5291"), val = tensor([1, 8, 128, 256])]; tensor kc_45_cast_fp16 = reshape(shape = var_5291, x = var_5286_cast_fp16)[name = string("kc_45_cast_fp16")]; tensor var_5294_axes_0 = const()[name = string("op_5294_axes_0"), val = tensor([2])]; tensor var_5294_cast_fp16 = squeeze(axes = var_5294_axes_0, x = lvc_47_cast_fp16)[name = string("op_5294_cast_fp16")]; tensor var_5299 = const()[name = string("op_5299"), val = tensor([1, 8, 128, 256])]; tensor vc_45_cast_fp16 = reshape(shape = var_5299, x = var_5294_cast_fp16)[name = string("vc_45_cast_fp16")]; tensor var_5302_axes_0 = const()[name = string("op_5302_axes_0"), val = tensor([2])]; tensor var_5302_cast_fp16 = expand_dims(axes = var_5302_axes_0, x = kc_45_cast_fp16)[name = string("op_5302_cast_fp16")]; tensor var_5310_reps_0 = const()[name = string("op_5310_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5310_cast_fp16 = tile(reps = var_5310_reps_0, x = var_5302_cast_fp16)[name = string("op_5310_cast_fp16")]; tensor var_5315 = const()[name = string("op_5315"), val = tensor([1, 16, 128, 256])]; tensor kc_47_cast_fp16 = reshape(shape = var_5315, x = var_5310_cast_fp16)[name = string("kc_47_cast_fp16")]; tensor var_5318_axes_0 = const()[name = string("op_5318_axes_0"), val = tensor([2])]; tensor var_5318_cast_fp16 = expand_dims(axes = var_5318_axes_0, x = vc_45_cast_fp16)[name = string("op_5318_cast_fp16")]; tensor var_5326_reps_0 = const()[name = string("op_5326_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5326_cast_fp16 = tile(reps = var_5326_reps_0, x = var_5318_cast_fp16)[name = string("op_5326_cast_fp16")]; tensor var_5331 = const()[name = string("op_5331"), val = tensor([1, 16, 128, 256])]; tensor vc_47_cast_fp16 = reshape(shape = var_5331, x = var_5326_cast_fp16)[name = string("vc_47_cast_fp16")]; tensor var_5335_perm_0 = const()[name = string("op_5335_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_5336_transpose_x_0 = const()[name = string("op_5336_transpose_x_0"), val = bool(false)]; bool var_5336_transpose_y_0 = const()[name = string("op_5336_transpose_y_0"), val = bool(false)]; tensor var_5335_cast_fp16 = transpose(perm = var_5335_perm_0, x = q_71_cast_fp16)[name = string("transpose_33")]; tensor var_5336_cast_fp16 = matmul(transpose_x = var_5336_transpose_x_0, transpose_y = var_5336_transpose_y_0, x = var_5335_cast_fp16, y = kc_47_cast_fp16)[name = string("op_5336_cast_fp16")]; fp16 _inversed_aw_89_y_0_to_fp16 = const()[name = string("_inversed_aw_89_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_89_cast_fp16 = mul(x = var_5336_cast_fp16, y = _inversed_aw_89_y_0_to_fp16)[name = string("_inversed_aw_89_cast_fp16")]; tensor aw_91_cast_fp16 = add(x = _inversed_aw_89_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_91_cast_fp16")]; int32 var_5350 = const()[name = string("op_5350"), val = int32(-1)]; tensor aw_95_cast_fp16 = softmax(axis = var_5350, x = aw_91_cast_fp16)[name = string("aw_95_cast_fp16")]; bool var_5356_transpose_x_1 = const()[name = string("op_5356_transpose_x_1"), val = bool(false)]; bool var_5356_transpose_y_1 = const()[name = string("op_5356_transpose_y_1"), val = bool(true)]; tensor var_5356_cast_fp16 = matmul(transpose_x = var_5356_transpose_x_1, transpose_y = var_5356_transpose_y_1, x = aw_95_cast_fp16, y = vc_47_cast_fp16)[name = string("op_5356_cast_fp16")]; tensor var_5359_perm_0 = const()[name = string("op_5359_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5363 = const()[name = string("op_5363"), val = tensor([1, 1, -1])]; tensor var_5359_cast_fp16 = transpose(perm = var_5359_perm_0, x = var_5356_cast_fp16)[name = string("transpose_32")]; tensor input_113_cast_fp16 = reshape(shape = var_5363, x = var_5359_cast_fp16)[name = string("input_113_cast_fp16")]; tensor layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177329536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179426752))))[name = string("layers_11_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor var_5369_axes_0 = const()[name = string("op_5369_axes_0"), val = tensor([0])]; tensor var_5369_cast_fp16 = squeeze(axes = var_5369_axes_0, x = linear_80_cast_fp16)[name = string("op_5369_cast_fp16")]; tensor var_5371_axes_0 = const()[name = string("op_5371_axes_0"), val = tensor([0])]; tensor var_5371_cast_fp16 = squeeze(axes = var_5371_axes_0, x = var_5369_cast_fp16)[name = string("op_5371_cast_fp16")]; tensor var_5373_axes_0 = const()[name = string("op_5373_axes_0"), val = tensor([-1])]; tensor var_5373_cast_fp16 = expand_dims(axes = var_5373_axes_0, x = var_5371_cast_fp16)[name = string("op_5373_cast_fp16")]; tensor ao_23_axes_0 = const()[name = string("ao_23_axes_0"), val = tensor([-1])]; tensor ao_23_cast_fp16 = expand_dims(axes = ao_23_axes_0, x = var_5373_cast_fp16)[name = string("ao_23_cast_fp16")]; tensor hidden_45_cast_fp16 = add(x = hidden_43_cast_fp16, y = ao_23_cast_fp16)[name = string("hidden_45_cast_fp16")]; tensor var_5379_axes_0 = const()[name = string("op_5379_axes_0"), val = tensor([-1])]; tensor var_5379_cast_fp16 = squeeze(axes = var_5379_axes_0, x = hidden_45_cast_fp16)[name = string("op_5379_cast_fp16")]; tensor var_5381_axes_0 = const()[name = string("op_5381_axes_0"), val = tensor([-1])]; tensor var_5381_cast_fp16 = squeeze(axes = var_5381_axes_0, x = var_5379_cast_fp16)[name = string("op_5381_cast_fp16")]; tensor hidden_states_189_axes_0 = const()[name = string("hidden_states_189_axes_0"), val = tensor([0])]; tensor hidden_states_189_cast_fp16 = expand_dims(axes = hidden_states_189_axes_0, x = var_5381_cast_fp16)[name = string("hidden_states_189_cast_fp16")]; fp16 var_5387_promoted_to_fp16 = const()[name = string("op_5387_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5393_cast_fp16 = pow(x = hidden_states_189_cast_fp16, y = var_5387_promoted_to_fp16)[name = string("op_5393_cast_fp16")]; tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_5393_cast_fp16)[name = string("variance_95_cast_fp16")]; tensor const_120_to_fp16 = const()[name = string("const_120_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179427328)))]; tensor var_5397_cast_fp16 = mul(x = const_120_to_fp16, y = hidden_states_189_cast_fp16)[name = string("op_5397_cast_fp16")]; fp16 var_5398_to_fp16 = const()[name = string("op_5398_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5399_cast_fp16 = add(x = variance_95_cast_fp16, y = var_5398_to_fp16)[name = string("op_5399_cast_fp16")]; fp32 var_5400_epsilon_0 = const()[name = string("op_5400_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5400_cast_fp16 = rsqrt(epsilon = var_5400_epsilon_0, x = var_5399_cast_fp16)[name = string("op_5400_cast_fp16")]; tensor input_115_cast_fp16 = mul(x = var_5397_cast_fp16, y = var_5400_cast_fp16)[name = string("input_115_cast_fp16")]; tensor layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179429440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575232))))[name = string("layers_11_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_5408_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_5408_cast_fp16")]; tensor layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182575808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185721600))))[name = string("layers_11_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_11_mlp_up_proj_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_119_cast_fp16 = mul(x = var_5408_cast_fp16, y = linear_82_cast_fp16)[name = string("input_119_cast_fp16")]; tensor layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185722176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188867968))))[name = string("layers_11_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor var_5415_axes_0 = const()[name = string("op_5415_axes_0"), val = tensor([0])]; tensor var_5415_cast_fp16 = squeeze(axes = var_5415_axes_0, x = linear_83_cast_fp16)[name = string("op_5415_cast_fp16")]; tensor var_5417_axes_0 = const()[name = string("op_5417_axes_0"), val = tensor([0])]; tensor var_5417_cast_fp16 = squeeze(axes = var_5417_axes_0, x = var_5415_cast_fp16)[name = string("op_5417_cast_fp16")]; tensor var_5419_axes_0 = const()[name = string("op_5419_axes_0"), val = tensor([-1])]; tensor var_5419_cast_fp16 = expand_dims(axes = var_5419_axes_0, x = var_5417_cast_fp16)[name = string("op_5419_cast_fp16")]; tensor h_23_axes_0 = const()[name = string("h_23_axes_0"), val = tensor([-1])]; tensor h_23_cast_fp16 = expand_dims(axes = h_23_axes_0, x = var_5419_cast_fp16)[name = string("h_23_cast_fp16")]; tensor hidden_47_cast_fp16 = add(x = hidden_45_cast_fp16, y = h_23_cast_fp16)[name = string("hidden_47_cast_fp16")]; tensor var_5433_begin_0 = const()[name = string("op_5433_begin_0"), val = tensor([0, 12288, 0, 0])]; tensor var_5433_end_0 = const()[name = string("op_5433_end_0"), val = tensor([1, 13312, 1, 256])]; tensor var_5433_end_mask_0 = const()[name = string("op_5433_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5433_cast_fp16 = slice_by_index(begin = var_5433_begin_0, end = var_5433_end_0, end_mask = var_5433_end_mask_0, x = key_cache)[name = string("op_5433_cast_fp16")]; tensor var_5453_begin_0 = const()[name = string("op_5453_begin_0"), val = tensor([0, 12288, 0, 0])]; tensor var_5453_end_0 = const()[name = string("op_5453_end_0"), val = tensor([1, 13312, 1, 256])]; tensor var_5453_end_mask_0 = const()[name = string("op_5453_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5453_cast_fp16 = slice_by_index(begin = var_5453_begin_0, end = var_5453_end_0, end_mask = var_5453_end_mask_0, x = value_cache)[name = string("op_5453_cast_fp16")]; tensor var_5465_axes_0 = const()[name = string("op_5465_axes_0"), val = tensor([-1])]; tensor var_5465_cast_fp16 = squeeze(axes = var_5465_axes_0, x = hidden_47_cast_fp16)[name = string("op_5465_cast_fp16")]; tensor var_5467_axes_0 = const()[name = string("op_5467_axes_0"), val = tensor([-1])]; tensor var_5467_cast_fp16 = squeeze(axes = var_5467_axes_0, x = var_5465_cast_fp16)[name = string("op_5467_cast_fp16")]; tensor hidden_states_193_axes_0 = const()[name = string("hidden_states_193_axes_0"), val = tensor([0])]; tensor hidden_states_193_cast_fp16 = expand_dims(axes = hidden_states_193_axes_0, x = var_5467_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; fp16 var_5473_promoted_to_fp16 = const()[name = string("op_5473_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5479_cast_fp16 = pow(x = hidden_states_193_cast_fp16, y = var_5473_promoted_to_fp16)[name = string("op_5479_cast_fp16")]; tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_5479_cast_fp16)[name = string("variance_97_cast_fp16")]; tensor const_121_to_fp16 = const()[name = string("const_121_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188868544)))]; tensor var_5483_cast_fp16 = mul(x = const_121_to_fp16, y = hidden_states_193_cast_fp16)[name = string("op_5483_cast_fp16")]; fp16 var_5484_to_fp16 = const()[name = string("op_5484_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5485_cast_fp16 = add(x = variance_97_cast_fp16, y = var_5484_to_fp16)[name = string("op_5485_cast_fp16")]; fp32 var_5486_epsilon_0 = const()[name = string("op_5486_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5486_cast_fp16 = rsqrt(epsilon = var_5486_epsilon_0, x = var_5485_cast_fp16)[name = string("op_5486_cast_fp16")]; tensor input_121_cast_fp16 = mul(x = var_5483_cast_fp16, y = var_5486_cast_fp16)[name = string("input_121_cast_fp16")]; tensor layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188870656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190967872))))[name = string("layers_12_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor var_5495 = const()[name = string("op_5495"), val = tensor([1, 1, 16, 128])]; tensor var_5496_cast_fp16 = reshape(shape = var_5495, x = linear_84_cast_fp16)[name = string("op_5496_cast_fp16")]; tensor layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190968448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017088))))[name = string("layers_12_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor var_5507 = const()[name = string("op_5507"), val = tensor([1, 1, 8, 128])]; tensor var_5508_cast_fp16 = reshape(shape = var_5507, x = linear_85_cast_fp16)[name = string("op_5508_cast_fp16")]; tensor layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192017664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066304))))[name = string("layers_12_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("linear_86_cast_fp16")]; fp16 var_5527_promoted_to_fp16 = const()[name = string("op_5527_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5533_cast_fp16 = pow(x = var_5496_cast_fp16, y = var_5527_promoted_to_fp16)[name = string("op_5533_cast_fp16")]; bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; tensor const_330 = const()[name = string("const_330"), val = tensor([3])]; tensor variance_99_cast_fp16 = reduce_mean(axes = const_330, keep_dims = variance_99_keep_dims_0, x = var_5533_cast_fp16)[name = string("variance_99_cast_fp16")]; tensor const_331_to_fp16 = const()[name = string("const_331_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193066880)))]; tensor var_5537_cast_fp16 = mul(x = const_331_to_fp16, y = var_5496_cast_fp16)[name = string("op_5537_cast_fp16")]; fp16 var_5538_to_fp16 = const()[name = string("op_5538_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5539_cast_fp16 = add(x = variance_99_cast_fp16, y = var_5538_to_fp16)[name = string("op_5539_cast_fp16")]; fp32 var_5540_epsilon_0 = const()[name = string("op_5540_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5540_cast_fp16 = rsqrt(epsilon = var_5540_epsilon_0, x = var_5539_cast_fp16)[name = string("op_5540_cast_fp16")]; tensor q_73_cast_fp16 = mul(x = var_5537_cast_fp16, y = var_5540_cast_fp16)[name = string("q_73_cast_fp16")]; fp16 var_5545_promoted_to_fp16 = const()[name = string("op_5545_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5551_cast_fp16 = pow(x = var_5508_cast_fp16, y = var_5545_promoted_to_fp16)[name = string("op_5551_cast_fp16")]; bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; tensor const_332 = const()[name = string("const_332"), val = tensor([3])]; tensor variance_101_cast_fp16 = reduce_mean(axes = const_332, keep_dims = variance_101_keep_dims_0, x = var_5551_cast_fp16)[name = string("variance_101_cast_fp16")]; tensor const_333_to_fp16 = const()[name = string("const_333_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067200)))]; tensor var_5555_cast_fp16 = mul(x = const_333_to_fp16, y = var_5508_cast_fp16)[name = string("op_5555_cast_fp16")]; fp16 var_5556_to_fp16 = const()[name = string("op_5556_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5557_cast_fp16 = add(x = variance_101_cast_fp16, y = var_5556_to_fp16)[name = string("op_5557_cast_fp16")]; fp32 var_5558_epsilon_0 = const()[name = string("op_5558_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5558_cast_fp16 = rsqrt(epsilon = var_5558_epsilon_0, x = var_5557_cast_fp16)[name = string("op_5558_cast_fp16")]; tensor k_73_cast_fp16 = mul(x = var_5555_cast_fp16, y = var_5558_cast_fp16)[name = string("k_73_cast_fp16")]; tensor var_5573_cast_fp16 = mul(x = q_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5573_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_73_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_73_cast_fp16)[name = string("x2_49_cast_fp16")]; fp16 const_126_promoted_to_fp16 = const()[name = string("const_126_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5594_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_126_promoted_to_fp16)[name = string("op_5594_cast_fp16")]; int32 var_5596 = const()[name = string("op_5596"), val = int32(-1)]; bool var_5597_interleave_0 = const()[name = string("op_5597_interleave_0"), val = bool(false)]; tensor var_5597_cast_fp16 = concat(axis = var_5596, interleave = var_5597_interleave_0, values = (var_5594_cast_fp16, x1_49_cast_fp16))[name = string("op_5597_cast_fp16")]; tensor var_5598_cast_fp16 = mul(x = var_5597_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5598_cast_fp16")]; tensor q_77_cast_fp16 = add(x = var_5573_cast_fp16, y = var_5598_cast_fp16)[name = string("q_77_cast_fp16")]; tensor var_5601_cast_fp16 = mul(x = k_73_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5601_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_73_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_73_cast_fp16)[name = string("x2_51_cast_fp16")]; fp16 const_129_promoted_to_fp16 = const()[name = string("const_129_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5622_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_129_promoted_to_fp16)[name = string("op_5622_cast_fp16")]; int32 var_5624 = const()[name = string("op_5624"), val = int32(-1)]; bool var_5625_interleave_0 = const()[name = string("op_5625_interleave_0"), val = bool(false)]; tensor var_5625_cast_fp16 = concat(axis = var_5624, interleave = var_5625_interleave_0, values = (var_5622_cast_fp16, x1_51_cast_fp16))[name = string("op_5625_cast_fp16")]; tensor var_5626_cast_fp16 = mul(x = var_5625_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5626_cast_fp16")]; tensor k_77_cast_fp16 = add(x = var_5601_cast_fp16, y = var_5626_cast_fp16)[name = string("k_77_cast_fp16")]; tensor var_5633 = const()[name = string("op_5633"), val = tensor([1, 1024, 1, 1])]; tensor nk_25_cast_fp16 = reshape(shape = var_5633, x = k_77_cast_fp16)[name = string("nk_25_cast_fp16")]; tensor var_5639 = const()[name = string("op_5639"), val = tensor([1, 1024, 1, 1])]; tensor nv_25_cast_fp16 = reshape(shape = var_5639, x = linear_86_cast_fp16)[name = string("nv_25_cast_fp16")]; tensor var_5644_cast_fp16 = mul(x = var_5433_cast_fp16, y = var_1203_cast_fp16)[name = string("op_5644_cast_fp16")]; tensor var_5645_cast_fp16 = mul(x = nk_25_cast_fp16, y = update_mask_cast_fp16)[name = string("op_5645_cast_fp16")]; tensor lkc_51_cast_fp16 = add(x = var_5644_cast_fp16, y = var_5645_cast_fp16)[name = string("lkc_51_cast_fp16")]; tensor var_5651_cast_fp16 = mul(x = var_5453_cast_fp16, y = var_1203_cast_fp16)[name = string("op_5651_cast_fp16")]; tensor var_5652_cast_fp16 = mul(x = nv_25_cast_fp16, y = update_mask_cast_fp16)[name = string("op_5652_cast_fp16")]; tensor lvc_51_cast_fp16 = add(x = var_5651_cast_fp16, y = var_5652_cast_fp16)[name = string("lvc_51_cast_fp16")]; tensor var_5656_axes_0 = const()[name = string("op_5656_axes_0"), val = tensor([2])]; tensor var_5656_cast_fp16 = squeeze(axes = var_5656_axes_0, x = lkc_51_cast_fp16)[name = string("op_5656_cast_fp16")]; tensor var_5661 = const()[name = string("op_5661"), val = tensor([1, 8, 128, 256])]; tensor kc_49_cast_fp16 = reshape(shape = var_5661, x = var_5656_cast_fp16)[name = string("kc_49_cast_fp16")]; tensor var_5664_axes_0 = const()[name = string("op_5664_axes_0"), val = tensor([2])]; tensor var_5664_cast_fp16 = squeeze(axes = var_5664_axes_0, x = lvc_51_cast_fp16)[name = string("op_5664_cast_fp16")]; tensor var_5669 = const()[name = string("op_5669"), val = tensor([1, 8, 128, 256])]; tensor vc_49_cast_fp16 = reshape(shape = var_5669, x = var_5664_cast_fp16)[name = string("vc_49_cast_fp16")]; tensor var_5672_axes_0 = const()[name = string("op_5672_axes_0"), val = tensor([2])]; tensor var_5672_cast_fp16 = expand_dims(axes = var_5672_axes_0, x = kc_49_cast_fp16)[name = string("op_5672_cast_fp16")]; tensor var_5680_reps_0 = const()[name = string("op_5680_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5680_cast_fp16 = tile(reps = var_5680_reps_0, x = var_5672_cast_fp16)[name = string("op_5680_cast_fp16")]; tensor var_5685 = const()[name = string("op_5685"), val = tensor([1, 16, 128, 256])]; tensor kc_51_cast_fp16 = reshape(shape = var_5685, x = var_5680_cast_fp16)[name = string("kc_51_cast_fp16")]; tensor var_5688_axes_0 = const()[name = string("op_5688_axes_0"), val = tensor([2])]; tensor var_5688_cast_fp16 = expand_dims(axes = var_5688_axes_0, x = vc_49_cast_fp16)[name = string("op_5688_cast_fp16")]; tensor var_5696_reps_0 = const()[name = string("op_5696_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_5696_cast_fp16 = tile(reps = var_5696_reps_0, x = var_5688_cast_fp16)[name = string("op_5696_cast_fp16")]; tensor var_5701 = const()[name = string("op_5701"), val = tensor([1, 16, 128, 256])]; tensor vc_51_cast_fp16 = reshape(shape = var_5701, x = var_5696_cast_fp16)[name = string("vc_51_cast_fp16")]; tensor var_5705_perm_0 = const()[name = string("op_5705_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_5706_transpose_x_0 = const()[name = string("op_5706_transpose_x_0"), val = bool(false)]; bool var_5706_transpose_y_0 = const()[name = string("op_5706_transpose_y_0"), val = bool(false)]; tensor var_5705_cast_fp16 = transpose(perm = var_5705_perm_0, x = q_77_cast_fp16)[name = string("transpose_31")]; tensor var_5706_cast_fp16 = matmul(transpose_x = var_5706_transpose_x_0, transpose_y = var_5706_transpose_y_0, x = var_5705_cast_fp16, y = kc_51_cast_fp16)[name = string("op_5706_cast_fp16")]; fp16 _inversed_aw_97_y_0_to_fp16 = const()[name = string("_inversed_aw_97_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_97_cast_fp16 = mul(x = var_5706_cast_fp16, y = _inversed_aw_97_y_0_to_fp16)[name = string("_inversed_aw_97_cast_fp16")]; tensor aw_99_cast_fp16 = add(x = _inversed_aw_97_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_99_cast_fp16")]; int32 var_5720 = const()[name = string("op_5720"), val = int32(-1)]; tensor aw_103_cast_fp16 = softmax(axis = var_5720, x = aw_99_cast_fp16)[name = string("aw_103_cast_fp16")]; bool var_5726_transpose_x_1 = const()[name = string("op_5726_transpose_x_1"), val = bool(false)]; bool var_5726_transpose_y_1 = const()[name = string("op_5726_transpose_y_1"), val = bool(true)]; tensor var_5726_cast_fp16 = matmul(transpose_x = var_5726_transpose_x_1, transpose_y = var_5726_transpose_y_1, x = aw_103_cast_fp16, y = vc_51_cast_fp16)[name = string("op_5726_cast_fp16")]; tensor var_5729_perm_0 = const()[name = string("op_5729_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_5733 = const()[name = string("op_5733"), val = tensor([1, 1, -1])]; tensor var_5729_cast_fp16 = transpose(perm = var_5729_perm_0, x = var_5726_cast_fp16)[name = string("transpose_30")]; tensor input_123_cast_fp16 = reshape(shape = var_5733, x = var_5729_cast_fp16)[name = string("input_123_cast_fp16")]; tensor layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193067520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195164736))))[name = string("layers_12_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor var_5739_axes_0 = const()[name = string("op_5739_axes_0"), val = tensor([0])]; tensor var_5739_cast_fp16 = squeeze(axes = var_5739_axes_0, x = linear_87_cast_fp16)[name = string("op_5739_cast_fp16")]; tensor var_5741_axes_0 = const()[name = string("op_5741_axes_0"), val = tensor([0])]; tensor var_5741_cast_fp16 = squeeze(axes = var_5741_axes_0, x = var_5739_cast_fp16)[name = string("op_5741_cast_fp16")]; tensor var_5743_axes_0 = const()[name = string("op_5743_axes_0"), val = tensor([-1])]; tensor var_5743_cast_fp16 = expand_dims(axes = var_5743_axes_0, x = var_5741_cast_fp16)[name = string("op_5743_cast_fp16")]; tensor ao_25_axes_0 = const()[name = string("ao_25_axes_0"), val = tensor([-1])]; tensor ao_25_cast_fp16 = expand_dims(axes = ao_25_axes_0, x = var_5743_cast_fp16)[name = string("ao_25_cast_fp16")]; tensor hidden_49_cast_fp16 = add(x = hidden_47_cast_fp16, y = ao_25_cast_fp16)[name = string("hidden_49_cast_fp16")]; tensor var_5749_axes_0 = const()[name = string("op_5749_axes_0"), val = tensor([-1])]; tensor var_5749_cast_fp16 = squeeze(axes = var_5749_axes_0, x = hidden_49_cast_fp16)[name = string("op_5749_cast_fp16")]; tensor var_5751_axes_0 = const()[name = string("op_5751_axes_0"), val = tensor([-1])]; tensor var_5751_cast_fp16 = squeeze(axes = var_5751_axes_0, x = var_5749_cast_fp16)[name = string("op_5751_cast_fp16")]; tensor hidden_states_205_axes_0 = const()[name = string("hidden_states_205_axes_0"), val = tensor([0])]; tensor hidden_states_205_cast_fp16 = expand_dims(axes = hidden_states_205_axes_0, x = var_5751_cast_fp16)[name = string("hidden_states_205_cast_fp16")]; fp16 var_5757_promoted_to_fp16 = const()[name = string("op_5757_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5763_cast_fp16 = pow(x = hidden_states_205_cast_fp16, y = var_5757_promoted_to_fp16)[name = string("op_5763_cast_fp16")]; tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_5763_cast_fp16)[name = string("variance_103_cast_fp16")]; tensor const_130_to_fp16 = const()[name = string("const_130_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195165312)))]; tensor var_5767_cast_fp16 = mul(x = const_130_to_fp16, y = hidden_states_205_cast_fp16)[name = string("op_5767_cast_fp16")]; fp16 var_5768_to_fp16 = const()[name = string("op_5768_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5769_cast_fp16 = add(x = variance_103_cast_fp16, y = var_5768_to_fp16)[name = string("op_5769_cast_fp16")]; fp32 var_5770_epsilon_0 = const()[name = string("op_5770_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5770_cast_fp16 = rsqrt(epsilon = var_5770_epsilon_0, x = var_5769_cast_fp16)[name = string("op_5770_cast_fp16")]; tensor input_125_cast_fp16 = mul(x = var_5767_cast_fp16, y = var_5770_cast_fp16)[name = string("input_125_cast_fp16")]; tensor layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195167424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313216))))[name = string("layers_12_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = input_125_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_5778_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_5778_cast_fp16")]; tensor layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198313792))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201459584))))[name = string("layers_12_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_12_mlp_up_proj_weight_to_fp16_palettized, x = input_125_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_129_cast_fp16 = mul(x = var_5778_cast_fp16, y = linear_89_cast_fp16)[name = string("input_129_cast_fp16")]; tensor layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201460160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204605952))))[name = string("layers_12_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor var_5785_axes_0 = const()[name = string("op_5785_axes_0"), val = tensor([0])]; tensor var_5785_cast_fp16 = squeeze(axes = var_5785_axes_0, x = linear_90_cast_fp16)[name = string("op_5785_cast_fp16")]; tensor var_5787_axes_0 = const()[name = string("op_5787_axes_0"), val = tensor([0])]; tensor var_5787_cast_fp16 = squeeze(axes = var_5787_axes_0, x = var_5785_cast_fp16)[name = string("op_5787_cast_fp16")]; tensor var_5789_axes_0 = const()[name = string("op_5789_axes_0"), val = tensor([-1])]; tensor var_5789_cast_fp16 = expand_dims(axes = var_5789_axes_0, x = var_5787_cast_fp16)[name = string("op_5789_cast_fp16")]; tensor h_25_axes_0 = const()[name = string("h_25_axes_0"), val = tensor([-1])]; tensor h_25_cast_fp16 = expand_dims(axes = h_25_axes_0, x = var_5789_cast_fp16)[name = string("h_25_cast_fp16")]; tensor hidden_51_cast_fp16 = add(x = hidden_49_cast_fp16, y = h_25_cast_fp16)[name = string("hidden_51_cast_fp16")]; tensor var_5803_begin_0 = const()[name = string("op_5803_begin_0"), val = tensor([0, 13312, 0, 0])]; tensor var_5803_end_0 = const()[name = string("op_5803_end_0"), val = tensor([1, 14336, 1, 256])]; tensor var_5803_end_mask_0 = const()[name = string("op_5803_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5803_cast_fp16 = slice_by_index(begin = var_5803_begin_0, end = var_5803_end_0, end_mask = var_5803_end_mask_0, x = key_cache)[name = string("op_5803_cast_fp16")]; tensor var_5823_begin_0 = const()[name = string("op_5823_begin_0"), val = tensor([0, 13312, 0, 0])]; tensor var_5823_end_0 = const()[name = string("op_5823_end_0"), val = tensor([1, 14336, 1, 256])]; tensor var_5823_end_mask_0 = const()[name = string("op_5823_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5823_cast_fp16 = slice_by_index(begin = var_5823_begin_0, end = var_5823_end_0, end_mask = var_5823_end_mask_0, x = value_cache)[name = string("op_5823_cast_fp16")]; tensor var_5835_axes_0 = const()[name = string("op_5835_axes_0"), val = tensor([-1])]; tensor var_5835_cast_fp16 = squeeze(axes = var_5835_axes_0, x = hidden_51_cast_fp16)[name = string("op_5835_cast_fp16")]; tensor var_5837_axes_0 = const()[name = string("op_5837_axes_0"), val = tensor([-1])]; tensor var_5837_cast_fp16 = squeeze(axes = var_5837_axes_0, x = var_5835_cast_fp16)[name = string("op_5837_cast_fp16")]; tensor hidden_states_209_axes_0 = const()[name = string("hidden_states_209_axes_0"), val = tensor([0])]; tensor hidden_states_209_cast_fp16 = expand_dims(axes = hidden_states_209_axes_0, x = var_5837_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; fp16 var_5843_promoted_to_fp16 = const()[name = string("op_5843_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5849_cast_fp16 = pow(x = hidden_states_209_cast_fp16, y = var_5843_promoted_to_fp16)[name = string("op_5849_cast_fp16")]; tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_5849_cast_fp16)[name = string("variance_105_cast_fp16")]; tensor const_131_to_fp16 = const()[name = string("const_131_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204606528)))]; tensor var_5853_cast_fp16 = mul(x = const_131_to_fp16, y = hidden_states_209_cast_fp16)[name = string("op_5853_cast_fp16")]; fp16 var_5854_to_fp16 = const()[name = string("op_5854_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5855_cast_fp16 = add(x = variance_105_cast_fp16, y = var_5854_to_fp16)[name = string("op_5855_cast_fp16")]; fp32 var_5856_epsilon_0 = const()[name = string("op_5856_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5856_cast_fp16 = rsqrt(epsilon = var_5856_epsilon_0, x = var_5855_cast_fp16)[name = string("op_5856_cast_fp16")]; tensor input_131_cast_fp16 = mul(x = var_5853_cast_fp16, y = var_5856_cast_fp16)[name = string("input_131_cast_fp16")]; tensor layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204608640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206705856))))[name = string("layers_13_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor var_5865 = const()[name = string("op_5865"), val = tensor([1, 1, 16, 128])]; tensor var_5866_cast_fp16 = reshape(shape = var_5865, x = linear_91_cast_fp16)[name = string("op_5866_cast_fp16")]; tensor layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206706432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755072))))[name = string("layers_13_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor var_5877 = const()[name = string("op_5877"), val = tensor([1, 1, 8, 128])]; tensor var_5878_cast_fp16 = reshape(shape = var_5877, x = linear_92_cast_fp16)[name = string("op_5878_cast_fp16")]; tensor layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207755648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804288))))[name = string("layers_13_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("linear_93_cast_fp16")]; fp16 var_5897_promoted_to_fp16 = const()[name = string("op_5897_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5903_cast_fp16 = pow(x = var_5866_cast_fp16, y = var_5897_promoted_to_fp16)[name = string("op_5903_cast_fp16")]; bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; tensor const_334 = const()[name = string("const_334"), val = tensor([3])]; tensor variance_107_cast_fp16 = reduce_mean(axes = const_334, keep_dims = variance_107_keep_dims_0, x = var_5903_cast_fp16)[name = string("variance_107_cast_fp16")]; tensor const_335_to_fp16 = const()[name = string("const_335_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208804864)))]; tensor var_5907_cast_fp16 = mul(x = const_335_to_fp16, y = var_5866_cast_fp16)[name = string("op_5907_cast_fp16")]; fp16 var_5908_to_fp16 = const()[name = string("op_5908_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5909_cast_fp16 = add(x = variance_107_cast_fp16, y = var_5908_to_fp16)[name = string("op_5909_cast_fp16")]; fp32 var_5910_epsilon_0 = const()[name = string("op_5910_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5910_cast_fp16 = rsqrt(epsilon = var_5910_epsilon_0, x = var_5909_cast_fp16)[name = string("op_5910_cast_fp16")]; tensor q_79_cast_fp16 = mul(x = var_5907_cast_fp16, y = var_5910_cast_fp16)[name = string("q_79_cast_fp16")]; fp16 var_5915_promoted_to_fp16 = const()[name = string("op_5915_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_5921_cast_fp16 = pow(x = var_5878_cast_fp16, y = var_5915_promoted_to_fp16)[name = string("op_5921_cast_fp16")]; bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; tensor const_336 = const()[name = string("const_336"), val = tensor([3])]; tensor variance_109_cast_fp16 = reduce_mean(axes = const_336, keep_dims = variance_109_keep_dims_0, x = var_5921_cast_fp16)[name = string("variance_109_cast_fp16")]; tensor const_337_to_fp16 = const()[name = string("const_337_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805184)))]; tensor var_5925_cast_fp16 = mul(x = const_337_to_fp16, y = var_5878_cast_fp16)[name = string("op_5925_cast_fp16")]; fp16 var_5926_to_fp16 = const()[name = string("op_5926_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5927_cast_fp16 = add(x = variance_109_cast_fp16, y = var_5926_to_fp16)[name = string("op_5927_cast_fp16")]; fp32 var_5928_epsilon_0 = const()[name = string("op_5928_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5928_cast_fp16 = rsqrt(epsilon = var_5928_epsilon_0, x = var_5927_cast_fp16)[name = string("op_5928_cast_fp16")]; tensor k_79_cast_fp16 = mul(x = var_5925_cast_fp16, y = var_5928_cast_fp16)[name = string("k_79_cast_fp16")]; tensor var_5943_cast_fp16 = mul(x = q_79_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5943_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_79_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_79_cast_fp16)[name = string("x2_53_cast_fp16")]; fp16 const_136_promoted_to_fp16 = const()[name = string("const_136_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5964_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_136_promoted_to_fp16)[name = string("op_5964_cast_fp16")]; int32 var_5966 = const()[name = string("op_5966"), val = int32(-1)]; bool var_5967_interleave_0 = const()[name = string("op_5967_interleave_0"), val = bool(false)]; tensor var_5967_cast_fp16 = concat(axis = var_5966, interleave = var_5967_interleave_0, values = (var_5964_cast_fp16, x1_53_cast_fp16))[name = string("op_5967_cast_fp16")]; tensor var_5968_cast_fp16 = mul(x = var_5967_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5968_cast_fp16")]; tensor q_83_cast_fp16 = add(x = var_5943_cast_fp16, y = var_5968_cast_fp16)[name = string("q_83_cast_fp16")]; tensor var_5971_cast_fp16 = mul(x = k_79_cast_fp16, y = cos_1_cast_fp16)[name = string("op_5971_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_79_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_79_cast_fp16)[name = string("x2_55_cast_fp16")]; fp16 const_139_promoted_to_fp16 = const()[name = string("const_139_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5992_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_139_promoted_to_fp16)[name = string("op_5992_cast_fp16")]; int32 var_5994 = const()[name = string("op_5994"), val = int32(-1)]; bool var_5995_interleave_0 = const()[name = string("op_5995_interleave_0"), val = bool(false)]; tensor var_5995_cast_fp16 = concat(axis = var_5994, interleave = var_5995_interleave_0, values = (var_5992_cast_fp16, x1_55_cast_fp16))[name = string("op_5995_cast_fp16")]; tensor var_5996_cast_fp16 = mul(x = var_5995_cast_fp16, y = sin_1_cast_fp16)[name = string("op_5996_cast_fp16")]; tensor k_83_cast_fp16 = add(x = var_5971_cast_fp16, y = var_5996_cast_fp16)[name = string("k_83_cast_fp16")]; tensor var_6003 = const()[name = string("op_6003"), val = tensor([1, 1024, 1, 1])]; tensor nk_27_cast_fp16 = reshape(shape = var_6003, x = k_83_cast_fp16)[name = string("nk_27_cast_fp16")]; tensor var_6009 = const()[name = string("op_6009"), val = tensor([1, 1024, 1, 1])]; tensor nv_27_cast_fp16 = reshape(shape = var_6009, x = linear_93_cast_fp16)[name = string("nv_27_cast_fp16")]; tensor var_6014_cast_fp16 = mul(x = var_5803_cast_fp16, y = var_1203_cast_fp16)[name = string("op_6014_cast_fp16")]; tensor var_6015_cast_fp16 = mul(x = nk_27_cast_fp16, y = update_mask_cast_fp16)[name = string("op_6015_cast_fp16")]; tensor lkc_55_cast_fp16 = add(x = var_6014_cast_fp16, y = var_6015_cast_fp16)[name = string("lkc_55_cast_fp16")]; tensor var_6021_cast_fp16 = mul(x = var_5823_cast_fp16, y = var_1203_cast_fp16)[name = string("op_6021_cast_fp16")]; tensor var_6022_cast_fp16 = mul(x = nv_27_cast_fp16, y = update_mask_cast_fp16)[name = string("op_6022_cast_fp16")]; tensor lvc_55_cast_fp16 = add(x = var_6021_cast_fp16, y = var_6022_cast_fp16)[name = string("lvc_55_cast_fp16")]; tensor var_6026_axes_0 = const()[name = string("op_6026_axes_0"), val = tensor([2])]; tensor var_6026_cast_fp16 = squeeze(axes = var_6026_axes_0, x = lkc_55_cast_fp16)[name = string("op_6026_cast_fp16")]; tensor var_6031 = const()[name = string("op_6031"), val = tensor([1, 8, 128, 256])]; tensor kc_53_cast_fp16 = reshape(shape = var_6031, x = var_6026_cast_fp16)[name = string("kc_53_cast_fp16")]; tensor var_6034_axes_0 = const()[name = string("op_6034_axes_0"), val = tensor([2])]; tensor var_6034_cast_fp16 = squeeze(axes = var_6034_axes_0, x = lvc_55_cast_fp16)[name = string("op_6034_cast_fp16")]; tensor var_6039 = const()[name = string("op_6039"), val = tensor([1, 8, 128, 256])]; tensor vc_53_cast_fp16 = reshape(shape = var_6039, x = var_6034_cast_fp16)[name = string("vc_53_cast_fp16")]; tensor var_6042_axes_0 = const()[name = string("op_6042_axes_0"), val = tensor([2])]; tensor var_6042_cast_fp16 = expand_dims(axes = var_6042_axes_0, x = kc_53_cast_fp16)[name = string("op_6042_cast_fp16")]; tensor var_6050_reps_0 = const()[name = string("op_6050_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6050_cast_fp16 = tile(reps = var_6050_reps_0, x = var_6042_cast_fp16)[name = string("op_6050_cast_fp16")]; tensor var_6055 = const()[name = string("op_6055"), val = tensor([1, 16, 128, 256])]; tensor kc_55_cast_fp16 = reshape(shape = var_6055, x = var_6050_cast_fp16)[name = string("kc_55_cast_fp16")]; tensor var_6058_axes_0 = const()[name = string("op_6058_axes_0"), val = tensor([2])]; tensor var_6058_cast_fp16 = expand_dims(axes = var_6058_axes_0, x = vc_53_cast_fp16)[name = string("op_6058_cast_fp16")]; tensor var_6066_reps_0 = const()[name = string("op_6066_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6066_cast_fp16 = tile(reps = var_6066_reps_0, x = var_6058_cast_fp16)[name = string("op_6066_cast_fp16")]; tensor var_6071 = const()[name = string("op_6071"), val = tensor([1, 16, 128, 256])]; tensor vc_55_cast_fp16 = reshape(shape = var_6071, x = var_6066_cast_fp16)[name = string("vc_55_cast_fp16")]; tensor var_6075_perm_0 = const()[name = string("op_6075_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_6076_transpose_x_0 = const()[name = string("op_6076_transpose_x_0"), val = bool(false)]; bool var_6076_transpose_y_0 = const()[name = string("op_6076_transpose_y_0"), val = bool(false)]; tensor var_6075_cast_fp16 = transpose(perm = var_6075_perm_0, x = q_83_cast_fp16)[name = string("transpose_29")]; tensor var_6076_cast_fp16 = matmul(transpose_x = var_6076_transpose_x_0, transpose_y = var_6076_transpose_y_0, x = var_6075_cast_fp16, y = kc_55_cast_fp16)[name = string("op_6076_cast_fp16")]; fp16 _inversed_aw_105_y_0_to_fp16 = const()[name = string("_inversed_aw_105_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_105_cast_fp16 = mul(x = var_6076_cast_fp16, y = _inversed_aw_105_y_0_to_fp16)[name = string("_inversed_aw_105_cast_fp16")]; tensor aw_107_cast_fp16 = add(x = _inversed_aw_105_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_107_cast_fp16")]; int32 var_6090 = const()[name = string("op_6090"), val = int32(-1)]; tensor aw_111_cast_fp16 = softmax(axis = var_6090, x = aw_107_cast_fp16)[name = string("aw_111_cast_fp16")]; bool var_6096_transpose_x_1 = const()[name = string("op_6096_transpose_x_1"), val = bool(false)]; bool var_6096_transpose_y_1 = const()[name = string("op_6096_transpose_y_1"), val = bool(true)]; tensor var_6096_cast_fp16 = matmul(transpose_x = var_6096_transpose_x_1, transpose_y = var_6096_transpose_y_1, x = aw_111_cast_fp16, y = vc_55_cast_fp16)[name = string("op_6096_cast_fp16")]; tensor var_6099_perm_0 = const()[name = string("op_6099_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6103 = const()[name = string("op_6103"), val = tensor([1, 1, -1])]; tensor var_6099_cast_fp16 = transpose(perm = var_6099_perm_0, x = var_6096_cast_fp16)[name = string("transpose_28")]; tensor input_133_cast_fp16 = reshape(shape = var_6103, x = var_6099_cast_fp16)[name = string("input_133_cast_fp16")]; tensor layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208805504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210902720))))[name = string("layers_13_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor var_6109_axes_0 = const()[name = string("op_6109_axes_0"), val = tensor([0])]; tensor var_6109_cast_fp16 = squeeze(axes = var_6109_axes_0, x = linear_94_cast_fp16)[name = string("op_6109_cast_fp16")]; tensor var_6111_axes_0 = const()[name = string("op_6111_axes_0"), val = tensor([0])]; tensor var_6111_cast_fp16 = squeeze(axes = var_6111_axes_0, x = var_6109_cast_fp16)[name = string("op_6111_cast_fp16")]; tensor var_6113_axes_0 = const()[name = string("op_6113_axes_0"), val = tensor([-1])]; tensor var_6113_cast_fp16 = expand_dims(axes = var_6113_axes_0, x = var_6111_cast_fp16)[name = string("op_6113_cast_fp16")]; tensor ao_27_axes_0 = const()[name = string("ao_27_axes_0"), val = tensor([-1])]; tensor ao_27_cast_fp16 = expand_dims(axes = ao_27_axes_0, x = var_6113_cast_fp16)[name = string("ao_27_cast_fp16")]; tensor hidden_53_cast_fp16 = add(x = hidden_51_cast_fp16, y = ao_27_cast_fp16)[name = string("hidden_53_cast_fp16")]; tensor var_6119_axes_0 = const()[name = string("op_6119_axes_0"), val = tensor([-1])]; tensor var_6119_cast_fp16 = squeeze(axes = var_6119_axes_0, x = hidden_53_cast_fp16)[name = string("op_6119_cast_fp16")]; tensor var_6121_axes_0 = const()[name = string("op_6121_axes_0"), val = tensor([-1])]; tensor var_6121_cast_fp16 = squeeze(axes = var_6121_axes_0, x = var_6119_cast_fp16)[name = string("op_6121_cast_fp16")]; tensor hidden_states_221_axes_0 = const()[name = string("hidden_states_221_axes_0"), val = tensor([0])]; tensor hidden_states_221_cast_fp16 = expand_dims(axes = hidden_states_221_axes_0, x = var_6121_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; fp16 var_6127_promoted_to_fp16 = const()[name = string("op_6127_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6133_cast_fp16 = pow(x = hidden_states_221_cast_fp16, y = var_6127_promoted_to_fp16)[name = string("op_6133_cast_fp16")]; tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_6133_cast_fp16)[name = string("variance_111_cast_fp16")]; tensor const_140_to_fp16 = const()[name = string("const_140_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210903296)))]; tensor var_6137_cast_fp16 = mul(x = const_140_to_fp16, y = hidden_states_221_cast_fp16)[name = string("op_6137_cast_fp16")]; fp16 var_6138_to_fp16 = const()[name = string("op_6138_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6139_cast_fp16 = add(x = variance_111_cast_fp16, y = var_6138_to_fp16)[name = string("op_6139_cast_fp16")]; fp32 var_6140_epsilon_0 = const()[name = string("op_6140_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6140_cast_fp16 = rsqrt(epsilon = var_6140_epsilon_0, x = var_6139_cast_fp16)[name = string("op_6140_cast_fp16")]; tensor input_135_cast_fp16 = mul(x = var_6137_cast_fp16, y = var_6140_cast_fp16)[name = string("input_135_cast_fp16")]; tensor layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210905408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051200))))[name = string("layers_13_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_6148_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_6148_cast_fp16")]; tensor layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214051776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217197568))))[name = string("layers_13_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_13_mlp_up_proj_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_139_cast_fp16 = mul(x = var_6148_cast_fp16, y = linear_96_cast_fp16)[name = string("input_139_cast_fp16")]; tensor layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217198144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220343936))))[name = string("layers_13_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor var_6155_axes_0 = const()[name = string("op_6155_axes_0"), val = tensor([0])]; tensor var_6155_cast_fp16 = squeeze(axes = var_6155_axes_0, x = linear_97_cast_fp16)[name = string("op_6155_cast_fp16")]; tensor var_6157_axes_0 = const()[name = string("op_6157_axes_0"), val = tensor([0])]; tensor var_6157_cast_fp16 = squeeze(axes = var_6157_axes_0, x = var_6155_cast_fp16)[name = string("op_6157_cast_fp16")]; tensor var_6159_axes_0 = const()[name = string("op_6159_axes_0"), val = tensor([-1])]; tensor var_6159_cast_fp16 = expand_dims(axes = var_6159_axes_0, x = var_6157_cast_fp16)[name = string("op_6159_cast_fp16")]; tensor h_27_axes_0 = const()[name = string("h_27_axes_0"), val = tensor([-1])]; tensor h_27_cast_fp16 = expand_dims(axes = h_27_axes_0, x = var_6159_cast_fp16)[name = string("h_27_cast_fp16")]; tensor hidden_55_cast_fp16 = add(x = hidden_53_cast_fp16, y = h_27_cast_fp16)[name = string("hidden_55_cast_fp16")]; tensor var_6173_begin_0 = const()[name = string("op_6173_begin_0"), val = tensor([0, 14336, 0, 0])]; tensor var_6173_end_0 = const()[name = string("op_6173_end_0"), val = tensor([1, 15360, 1, 256])]; tensor var_6173_end_mask_0 = const()[name = string("op_6173_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6173_cast_fp16 = slice_by_index(begin = var_6173_begin_0, end = var_6173_end_0, end_mask = var_6173_end_mask_0, x = key_cache)[name = string("op_6173_cast_fp16")]; tensor var_6193_begin_0 = const()[name = string("op_6193_begin_0"), val = tensor([0, 14336, 0, 0])]; tensor var_6193_end_0 = const()[name = string("op_6193_end_0"), val = tensor([1, 15360, 1, 256])]; tensor var_6193_end_mask_0 = const()[name = string("op_6193_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6193_cast_fp16 = slice_by_index(begin = var_6193_begin_0, end = var_6193_end_0, end_mask = var_6193_end_mask_0, x = value_cache)[name = string("op_6193_cast_fp16")]; tensor var_6205_axes_0 = const()[name = string("op_6205_axes_0"), val = tensor([-1])]; tensor var_6205_cast_fp16 = squeeze(axes = var_6205_axes_0, x = hidden_55_cast_fp16)[name = string("op_6205_cast_fp16")]; tensor var_6207_axes_0 = const()[name = string("op_6207_axes_0"), val = tensor([-1])]; tensor var_6207_cast_fp16 = squeeze(axes = var_6207_axes_0, x = var_6205_cast_fp16)[name = string("op_6207_cast_fp16")]; tensor hidden_states_225_axes_0 = const()[name = string("hidden_states_225_axes_0"), val = tensor([0])]; tensor hidden_states_225_cast_fp16 = expand_dims(axes = hidden_states_225_axes_0, x = var_6207_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; fp16 var_6213_promoted_to_fp16 = const()[name = string("op_6213_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6219_cast_fp16 = pow(x = hidden_states_225_cast_fp16, y = var_6213_promoted_to_fp16)[name = string("op_6219_cast_fp16")]; tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_6219_cast_fp16)[name = string("variance_113_cast_fp16")]; tensor const_141_to_fp16 = const()[name = string("const_141_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220344512)))]; tensor var_6223_cast_fp16 = mul(x = const_141_to_fp16, y = hidden_states_225_cast_fp16)[name = string("op_6223_cast_fp16")]; fp16 var_6224_to_fp16 = const()[name = string("op_6224_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6225_cast_fp16 = add(x = variance_113_cast_fp16, y = var_6224_to_fp16)[name = string("op_6225_cast_fp16")]; fp32 var_6226_epsilon_0 = const()[name = string("op_6226_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6226_cast_fp16 = rsqrt(epsilon = var_6226_epsilon_0, x = var_6225_cast_fp16)[name = string("op_6226_cast_fp16")]; tensor input_141_cast_fp16 = mul(x = var_6223_cast_fp16, y = var_6226_cast_fp16)[name = string("input_141_cast_fp16")]; tensor layers_14_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220346624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222443840))))[name = string("layers_14_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_14_self_attn_q_proj_weight_to_fp16_palettized, x = input_141_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor var_6235 = const()[name = string("op_6235"), val = tensor([1, 1, 16, 128])]; tensor var_6236_cast_fp16 = reshape(shape = var_6235, x = linear_98_cast_fp16)[name = string("op_6236_cast_fp16")]; tensor layers_14_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222444416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223493056))))[name = string("layers_14_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_k_proj_weight_to_fp16_palettized, x = input_141_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor var_6247 = const()[name = string("op_6247"), val = tensor([1, 1, 8, 128])]; tensor var_6248_cast_fp16 = reshape(shape = var_6247, x = linear_99_cast_fp16)[name = string("op_6248_cast_fp16")]; tensor layers_14_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223493632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224542272))))[name = string("layers_14_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_v_proj_weight_to_fp16_palettized, x = input_141_cast_fp16)[name = string("linear_100_cast_fp16")]; fp16 var_6267_promoted_to_fp16 = const()[name = string("op_6267_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6273_cast_fp16 = pow(x = var_6236_cast_fp16, y = var_6267_promoted_to_fp16)[name = string("op_6273_cast_fp16")]; bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; tensor const_338 = const()[name = string("const_338"), val = tensor([3])]; tensor variance_115_cast_fp16 = reduce_mean(axes = const_338, keep_dims = variance_115_keep_dims_0, x = var_6273_cast_fp16)[name = string("variance_115_cast_fp16")]; tensor const_339_to_fp16 = const()[name = string("const_339_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224542848)))]; tensor var_6277_cast_fp16 = mul(x = const_339_to_fp16, y = var_6236_cast_fp16)[name = string("op_6277_cast_fp16")]; fp16 var_6278_to_fp16 = const()[name = string("op_6278_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6279_cast_fp16 = add(x = variance_115_cast_fp16, y = var_6278_to_fp16)[name = string("op_6279_cast_fp16")]; fp32 var_6280_epsilon_0 = const()[name = string("op_6280_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6280_cast_fp16 = rsqrt(epsilon = var_6280_epsilon_0, x = var_6279_cast_fp16)[name = string("op_6280_cast_fp16")]; tensor q_85_cast_fp16 = mul(x = var_6277_cast_fp16, y = var_6280_cast_fp16)[name = string("q_85_cast_fp16")]; fp16 var_6285_promoted_to_fp16 = const()[name = string("op_6285_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6291_cast_fp16 = pow(x = var_6248_cast_fp16, y = var_6285_promoted_to_fp16)[name = string("op_6291_cast_fp16")]; bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; tensor const_340 = const()[name = string("const_340"), val = tensor([3])]; tensor variance_117_cast_fp16 = reduce_mean(axes = const_340, keep_dims = variance_117_keep_dims_0, x = var_6291_cast_fp16)[name = string("variance_117_cast_fp16")]; tensor const_341_to_fp16 = const()[name = string("const_341_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224543168)))]; tensor var_6295_cast_fp16 = mul(x = const_341_to_fp16, y = var_6248_cast_fp16)[name = string("op_6295_cast_fp16")]; fp16 var_6296_to_fp16 = const()[name = string("op_6296_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6297_cast_fp16 = add(x = variance_117_cast_fp16, y = var_6296_to_fp16)[name = string("op_6297_cast_fp16")]; fp32 var_6298_epsilon_0 = const()[name = string("op_6298_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6298_cast_fp16 = rsqrt(epsilon = var_6298_epsilon_0, x = var_6297_cast_fp16)[name = string("op_6298_cast_fp16")]; tensor k_85_cast_fp16 = mul(x = var_6295_cast_fp16, y = var_6298_cast_fp16)[name = string("k_85_cast_fp16")]; tensor var_6313_cast_fp16 = mul(x = q_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6313_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_85_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_85_cast_fp16)[name = string("x2_57_cast_fp16")]; fp16 const_146_promoted_to_fp16 = const()[name = string("const_146_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6334_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_146_promoted_to_fp16)[name = string("op_6334_cast_fp16")]; int32 var_6336 = const()[name = string("op_6336"), val = int32(-1)]; bool var_6337_interleave_0 = const()[name = string("op_6337_interleave_0"), val = bool(false)]; tensor var_6337_cast_fp16 = concat(axis = var_6336, interleave = var_6337_interleave_0, values = (var_6334_cast_fp16, x1_57_cast_fp16))[name = string("op_6337_cast_fp16")]; tensor var_6338_cast_fp16 = mul(x = var_6337_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6338_cast_fp16")]; tensor q_89_cast_fp16 = add(x = var_6313_cast_fp16, y = var_6338_cast_fp16)[name = string("q_89_cast_fp16")]; tensor var_6341_cast_fp16 = mul(x = k_85_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6341_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_85_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_85_cast_fp16)[name = string("x2_59_cast_fp16")]; fp16 const_149_promoted_to_fp16 = const()[name = string("const_149_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6362_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_149_promoted_to_fp16)[name = string("op_6362_cast_fp16")]; int32 var_6364 = const()[name = string("op_6364"), val = int32(-1)]; bool var_6365_interleave_0 = const()[name = string("op_6365_interleave_0"), val = bool(false)]; tensor var_6365_cast_fp16 = concat(axis = var_6364, interleave = var_6365_interleave_0, values = (var_6362_cast_fp16, x1_59_cast_fp16))[name = string("op_6365_cast_fp16")]; tensor var_6366_cast_fp16 = mul(x = var_6365_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6366_cast_fp16")]; tensor k_89_cast_fp16 = add(x = var_6341_cast_fp16, y = var_6366_cast_fp16)[name = string("k_89_cast_fp16")]; tensor var_6373 = const()[name = string("op_6373"), val = tensor([1, 1024, 1, 1])]; tensor nk_29_cast_fp16 = reshape(shape = var_6373, x = k_89_cast_fp16)[name = string("nk_29_cast_fp16")]; tensor var_6379 = const()[name = string("op_6379"), val = tensor([1, 1024, 1, 1])]; tensor nv_29_cast_fp16 = reshape(shape = var_6379, x = linear_100_cast_fp16)[name = string("nv_29_cast_fp16")]; tensor var_6384_cast_fp16 = mul(x = var_6173_cast_fp16, y = var_1203_cast_fp16)[name = string("op_6384_cast_fp16")]; tensor var_6385_cast_fp16 = mul(x = nk_29_cast_fp16, y = update_mask_cast_fp16)[name = string("op_6385_cast_fp16")]; tensor lkc_59_cast_fp16 = add(x = var_6384_cast_fp16, y = var_6385_cast_fp16)[name = string("lkc_59_cast_fp16")]; tensor var_6391_cast_fp16 = mul(x = var_6193_cast_fp16, y = var_1203_cast_fp16)[name = string("op_6391_cast_fp16")]; tensor var_6392_cast_fp16 = mul(x = nv_29_cast_fp16, y = update_mask_cast_fp16)[name = string("op_6392_cast_fp16")]; tensor lvc_59_cast_fp16 = add(x = var_6391_cast_fp16, y = var_6392_cast_fp16)[name = string("lvc_59_cast_fp16")]; tensor var_6396_axes_0 = const()[name = string("op_6396_axes_0"), val = tensor([2])]; tensor var_6396_cast_fp16 = squeeze(axes = var_6396_axes_0, x = lkc_59_cast_fp16)[name = string("op_6396_cast_fp16")]; tensor var_6401 = const()[name = string("op_6401"), val = tensor([1, 8, 128, 256])]; tensor kc_57_cast_fp16 = reshape(shape = var_6401, x = var_6396_cast_fp16)[name = string("kc_57_cast_fp16")]; tensor var_6404_axes_0 = const()[name = string("op_6404_axes_0"), val = tensor([2])]; tensor var_6404_cast_fp16 = squeeze(axes = var_6404_axes_0, x = lvc_59_cast_fp16)[name = string("op_6404_cast_fp16")]; tensor var_6409 = const()[name = string("op_6409"), val = tensor([1, 8, 128, 256])]; tensor vc_57_cast_fp16 = reshape(shape = var_6409, x = var_6404_cast_fp16)[name = string("vc_57_cast_fp16")]; tensor var_6412_axes_0 = const()[name = string("op_6412_axes_0"), val = tensor([2])]; tensor var_6412_cast_fp16 = expand_dims(axes = var_6412_axes_0, x = kc_57_cast_fp16)[name = string("op_6412_cast_fp16")]; tensor var_6420_reps_0 = const()[name = string("op_6420_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6420_cast_fp16 = tile(reps = var_6420_reps_0, x = var_6412_cast_fp16)[name = string("op_6420_cast_fp16")]; tensor var_6425 = const()[name = string("op_6425"), val = tensor([1, 16, 128, 256])]; tensor kc_59_cast_fp16 = reshape(shape = var_6425, x = var_6420_cast_fp16)[name = string("kc_59_cast_fp16")]; tensor var_6428_axes_0 = const()[name = string("op_6428_axes_0"), val = tensor([2])]; tensor var_6428_cast_fp16 = expand_dims(axes = var_6428_axes_0, x = vc_57_cast_fp16)[name = string("op_6428_cast_fp16")]; tensor var_6436_reps_0 = const()[name = string("op_6436_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6436_cast_fp16 = tile(reps = var_6436_reps_0, x = var_6428_cast_fp16)[name = string("op_6436_cast_fp16")]; tensor var_6441 = const()[name = string("op_6441"), val = tensor([1, 16, 128, 256])]; tensor vc_59_cast_fp16 = reshape(shape = var_6441, x = var_6436_cast_fp16)[name = string("vc_59_cast_fp16")]; tensor var_6445_perm_0 = const()[name = string("op_6445_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_6446_transpose_x_0 = const()[name = string("op_6446_transpose_x_0"), val = bool(false)]; bool var_6446_transpose_y_0 = const()[name = string("op_6446_transpose_y_0"), val = bool(false)]; tensor var_6445_cast_fp16 = transpose(perm = var_6445_perm_0, x = q_89_cast_fp16)[name = string("transpose_27")]; tensor var_6446_cast_fp16 = matmul(transpose_x = var_6446_transpose_x_0, transpose_y = var_6446_transpose_y_0, x = var_6445_cast_fp16, y = kc_59_cast_fp16)[name = string("op_6446_cast_fp16")]; fp16 _inversed_aw_113_y_0_to_fp16 = const()[name = string("_inversed_aw_113_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_113_cast_fp16 = mul(x = var_6446_cast_fp16, y = _inversed_aw_113_y_0_to_fp16)[name = string("_inversed_aw_113_cast_fp16")]; tensor aw_115_cast_fp16 = add(x = _inversed_aw_113_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_115_cast_fp16")]; int32 var_6460 = const()[name = string("op_6460"), val = int32(-1)]; tensor aw_119_cast_fp16 = softmax(axis = var_6460, x = aw_115_cast_fp16)[name = string("aw_119_cast_fp16")]; bool var_6466_transpose_x_1 = const()[name = string("op_6466_transpose_x_1"), val = bool(false)]; bool var_6466_transpose_y_1 = const()[name = string("op_6466_transpose_y_1"), val = bool(true)]; tensor var_6466_cast_fp16 = matmul(transpose_x = var_6466_transpose_x_1, transpose_y = var_6466_transpose_y_1, x = aw_119_cast_fp16, y = vc_59_cast_fp16)[name = string("op_6466_cast_fp16")]; tensor var_6469_perm_0 = const()[name = string("op_6469_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6473 = const()[name = string("op_6473"), val = tensor([1, 1, -1])]; tensor var_6469_cast_fp16 = transpose(perm = var_6469_perm_0, x = var_6466_cast_fp16)[name = string("transpose_26")]; tensor input_143_cast_fp16 = reshape(shape = var_6473, x = var_6469_cast_fp16)[name = string("input_143_cast_fp16")]; tensor layers_14_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224543488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226640704))))[name = string("layers_14_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_101_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_self_attn_o_proj_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor var_6479_axes_0 = const()[name = string("op_6479_axes_0"), val = tensor([0])]; tensor var_6479_cast_fp16 = squeeze(axes = var_6479_axes_0, x = linear_101_cast_fp16)[name = string("op_6479_cast_fp16")]; tensor var_6481_axes_0 = const()[name = string("op_6481_axes_0"), val = tensor([0])]; tensor var_6481_cast_fp16 = squeeze(axes = var_6481_axes_0, x = var_6479_cast_fp16)[name = string("op_6481_cast_fp16")]; tensor var_6483_axes_0 = const()[name = string("op_6483_axes_0"), val = tensor([-1])]; tensor var_6483_cast_fp16 = expand_dims(axes = var_6483_axes_0, x = var_6481_cast_fp16)[name = string("op_6483_cast_fp16")]; tensor ao_29_axes_0 = const()[name = string("ao_29_axes_0"), val = tensor([-1])]; tensor ao_29_cast_fp16 = expand_dims(axes = ao_29_axes_0, x = var_6483_cast_fp16)[name = string("ao_29_cast_fp16")]; tensor hidden_57_cast_fp16 = add(x = hidden_55_cast_fp16, y = ao_29_cast_fp16)[name = string("hidden_57_cast_fp16")]; tensor var_6489_axes_0 = const()[name = string("op_6489_axes_0"), val = tensor([-1])]; tensor var_6489_cast_fp16 = squeeze(axes = var_6489_axes_0, x = hidden_57_cast_fp16)[name = string("op_6489_cast_fp16")]; tensor var_6491_axes_0 = const()[name = string("op_6491_axes_0"), val = tensor([-1])]; tensor var_6491_cast_fp16 = squeeze(axes = var_6491_axes_0, x = var_6489_cast_fp16)[name = string("op_6491_cast_fp16")]; tensor hidden_states_237_axes_0 = const()[name = string("hidden_states_237_axes_0"), val = tensor([0])]; tensor hidden_states_237_cast_fp16 = expand_dims(axes = hidden_states_237_axes_0, x = var_6491_cast_fp16)[name = string("hidden_states_237_cast_fp16")]; fp16 var_6497_promoted_to_fp16 = const()[name = string("op_6497_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6503_cast_fp16 = pow(x = hidden_states_237_cast_fp16, y = var_6497_promoted_to_fp16)[name = string("op_6503_cast_fp16")]; tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_6503_cast_fp16)[name = string("variance_119_cast_fp16")]; tensor const_150_to_fp16 = const()[name = string("const_150_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226641280)))]; tensor var_6507_cast_fp16 = mul(x = const_150_to_fp16, y = hidden_states_237_cast_fp16)[name = string("op_6507_cast_fp16")]; fp16 var_6508_to_fp16 = const()[name = string("op_6508_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6509_cast_fp16 = add(x = variance_119_cast_fp16, y = var_6508_to_fp16)[name = string("op_6509_cast_fp16")]; fp32 var_6510_epsilon_0 = const()[name = string("op_6510_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6510_cast_fp16 = rsqrt(epsilon = var_6510_epsilon_0, x = var_6509_cast_fp16)[name = string("op_6510_cast_fp16")]; tensor input_145_cast_fp16 = mul(x = var_6507_cast_fp16, y = var_6510_cast_fp16)[name = string("input_145_cast_fp16")]; tensor layers_14_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226643392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229789184))))[name = string("layers_14_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_gate_proj_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = string("linear_102_cast_fp16")]; tensor var_6518_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_6518_cast_fp16")]; tensor layers_14_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229789760))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232935552))))[name = string("layers_14_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_14_mlp_up_proj_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor input_149_cast_fp16 = mul(x = var_6518_cast_fp16, y = linear_103_cast_fp16)[name = string("input_149_cast_fp16")]; tensor layers_14_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232936128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236081920))))[name = string("layers_14_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_104_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_14_mlp_down_proj_weight_to_fp16_palettized, x = input_149_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor var_6525_axes_0 = const()[name = string("op_6525_axes_0"), val = tensor([0])]; tensor var_6525_cast_fp16 = squeeze(axes = var_6525_axes_0, x = linear_104_cast_fp16)[name = string("op_6525_cast_fp16")]; tensor var_6527_axes_0 = const()[name = string("op_6527_axes_0"), val = tensor([0])]; tensor var_6527_cast_fp16 = squeeze(axes = var_6527_axes_0, x = var_6525_cast_fp16)[name = string("op_6527_cast_fp16")]; tensor var_6529_axes_0 = const()[name = string("op_6529_axes_0"), val = tensor([-1])]; tensor var_6529_cast_fp16 = expand_dims(axes = var_6529_axes_0, x = var_6527_cast_fp16)[name = string("op_6529_cast_fp16")]; tensor h_29_axes_0 = const()[name = string("h_29_axes_0"), val = tensor([-1])]; tensor h_29_cast_fp16 = expand_dims(axes = h_29_axes_0, x = var_6529_cast_fp16)[name = string("h_29_cast_fp16")]; tensor hidden_59_cast_fp16 = add(x = hidden_57_cast_fp16, y = h_29_cast_fp16)[name = string("hidden_59_cast_fp16")]; tensor var_6543_begin_0 = const()[name = string("op_6543_begin_0"), val = tensor([0, 15360, 0, 0])]; tensor var_6543_end_0 = const()[name = string("op_6543_end_0"), val = tensor([1, 16384, 1, 256])]; tensor var_6543_end_mask_0 = const()[name = string("op_6543_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6543_cast_fp16 = slice_by_index(begin = var_6543_begin_0, end = var_6543_end_0, end_mask = var_6543_end_mask_0, x = key_cache)[name = string("op_6543_cast_fp16")]; tensor var_6563_begin_0 = const()[name = string("op_6563_begin_0"), val = tensor([0, 15360, 0, 0])]; tensor var_6563_end_0 = const()[name = string("op_6563_end_0"), val = tensor([1, 16384, 1, 256])]; tensor var_6563_end_mask_0 = const()[name = string("op_6563_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6563_cast_fp16 = slice_by_index(begin = var_6563_begin_0, end = var_6563_end_0, end_mask = var_6563_end_mask_0, x = value_cache)[name = string("op_6563_cast_fp16")]; tensor var_6575_axes_0 = const()[name = string("op_6575_axes_0"), val = tensor([-1])]; tensor var_6575_cast_fp16 = squeeze(axes = var_6575_axes_0, x = hidden_59_cast_fp16)[name = string("op_6575_cast_fp16")]; tensor var_6577_axes_0 = const()[name = string("op_6577_axes_0"), val = tensor([-1])]; tensor var_6577_cast_fp16 = squeeze(axes = var_6577_axes_0, x = var_6575_cast_fp16)[name = string("op_6577_cast_fp16")]; tensor hidden_states_241_axes_0 = const()[name = string("hidden_states_241_axes_0"), val = tensor([0])]; tensor hidden_states_241_cast_fp16 = expand_dims(axes = hidden_states_241_axes_0, x = var_6577_cast_fp16)[name = string("hidden_states_241_cast_fp16")]; fp16 var_6583_promoted_to_fp16 = const()[name = string("op_6583_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6589_cast_fp16 = pow(x = hidden_states_241_cast_fp16, y = var_6583_promoted_to_fp16)[name = string("op_6589_cast_fp16")]; tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([-1])]; bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; tensor variance_121_cast_fp16 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = var_6589_cast_fp16)[name = string("variance_121_cast_fp16")]; tensor const_151_to_fp16 = const()[name = string("const_151_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236082496)))]; tensor var_6593_cast_fp16 = mul(x = const_151_to_fp16, y = hidden_states_241_cast_fp16)[name = string("op_6593_cast_fp16")]; fp16 var_6594_to_fp16 = const()[name = string("op_6594_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6595_cast_fp16 = add(x = variance_121_cast_fp16, y = var_6594_to_fp16)[name = string("op_6595_cast_fp16")]; fp32 var_6596_epsilon_0 = const()[name = string("op_6596_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6596_cast_fp16 = rsqrt(epsilon = var_6596_epsilon_0, x = var_6595_cast_fp16)[name = string("op_6596_cast_fp16")]; tensor input_151_cast_fp16 = mul(x = var_6593_cast_fp16, y = var_6596_cast_fp16)[name = string("input_151_cast_fp16")]; tensor layers_15_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236084608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238181824))))[name = string("layers_15_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_15_self_attn_q_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor var_6605 = const()[name = string("op_6605"), val = tensor([1, 1, 16, 128])]; tensor var_6606_cast_fp16 = reshape(shape = var_6605, x = linear_105_cast_fp16)[name = string("op_6606_cast_fp16")]; tensor layers_15_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238182400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239231040))))[name = string("layers_15_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_k_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor var_6617 = const()[name = string("op_6617"), val = tensor([1, 1, 8, 128])]; tensor var_6618_cast_fp16 = reshape(shape = var_6617, x = linear_106_cast_fp16)[name = string("op_6618_cast_fp16")]; tensor layers_15_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239231616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240280256))))[name = string("layers_15_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_v_proj_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("linear_107_cast_fp16")]; fp16 var_6637_promoted_to_fp16 = const()[name = string("op_6637_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6643_cast_fp16 = pow(x = var_6606_cast_fp16, y = var_6637_promoted_to_fp16)[name = string("op_6643_cast_fp16")]; bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; tensor const_342 = const()[name = string("const_342"), val = tensor([3])]; tensor variance_123_cast_fp16 = reduce_mean(axes = const_342, keep_dims = variance_123_keep_dims_0, x = var_6643_cast_fp16)[name = string("variance_123_cast_fp16")]; tensor const_343_to_fp16 = const()[name = string("const_343_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240280832)))]; tensor var_6647_cast_fp16 = mul(x = const_343_to_fp16, y = var_6606_cast_fp16)[name = string("op_6647_cast_fp16")]; fp16 var_6648_to_fp16 = const()[name = string("op_6648_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6649_cast_fp16 = add(x = variance_123_cast_fp16, y = var_6648_to_fp16)[name = string("op_6649_cast_fp16")]; fp32 var_6650_epsilon_0 = const()[name = string("op_6650_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6650_cast_fp16 = rsqrt(epsilon = var_6650_epsilon_0, x = var_6649_cast_fp16)[name = string("op_6650_cast_fp16")]; tensor q_91_cast_fp16 = mul(x = var_6647_cast_fp16, y = var_6650_cast_fp16)[name = string("q_91_cast_fp16")]; fp16 var_6655_promoted_to_fp16 = const()[name = string("op_6655_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6661_cast_fp16 = pow(x = var_6618_cast_fp16, y = var_6655_promoted_to_fp16)[name = string("op_6661_cast_fp16")]; bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; tensor const_344 = const()[name = string("const_344"), val = tensor([3])]; tensor variance_125_cast_fp16 = reduce_mean(axes = const_344, keep_dims = variance_125_keep_dims_0, x = var_6661_cast_fp16)[name = string("variance_125_cast_fp16")]; tensor const_345_to_fp16 = const()[name = string("const_345_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240281152)))]; tensor var_6665_cast_fp16 = mul(x = const_345_to_fp16, y = var_6618_cast_fp16)[name = string("op_6665_cast_fp16")]; fp16 var_6666_to_fp16 = const()[name = string("op_6666_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6667_cast_fp16 = add(x = variance_125_cast_fp16, y = var_6666_to_fp16)[name = string("op_6667_cast_fp16")]; fp32 var_6668_epsilon_0 = const()[name = string("op_6668_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6668_cast_fp16 = rsqrt(epsilon = var_6668_epsilon_0, x = var_6667_cast_fp16)[name = string("op_6668_cast_fp16")]; tensor k_91_cast_fp16 = mul(x = var_6665_cast_fp16, y = var_6668_cast_fp16)[name = string("k_91_cast_fp16")]; tensor var_6683_cast_fp16 = mul(x = q_91_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6683_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_91_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_91_cast_fp16)[name = string("x2_61_cast_fp16")]; fp16 const_156_promoted_to_fp16 = const()[name = string("const_156_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6704_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_156_promoted_to_fp16)[name = string("op_6704_cast_fp16")]; int32 var_6706 = const()[name = string("op_6706"), val = int32(-1)]; bool var_6707_interleave_0 = const()[name = string("op_6707_interleave_0"), val = bool(false)]; tensor var_6707_cast_fp16 = concat(axis = var_6706, interleave = var_6707_interleave_0, values = (var_6704_cast_fp16, x1_61_cast_fp16))[name = string("op_6707_cast_fp16")]; tensor var_6708_cast_fp16 = mul(x = var_6707_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6708_cast_fp16")]; tensor q_95_cast_fp16 = add(x = var_6683_cast_fp16, y = var_6708_cast_fp16)[name = string("q_95_cast_fp16")]; tensor var_6711_cast_fp16 = mul(x = k_91_cast_fp16, y = cos_1_cast_fp16)[name = string("op_6711_cast_fp16")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_91_cast_fp16)[name = string("x1_63_cast_fp16")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_91_cast_fp16)[name = string("x2_63_cast_fp16")]; fp16 const_159_promoted_to_fp16 = const()[name = string("const_159_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_6732_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_159_promoted_to_fp16)[name = string("op_6732_cast_fp16")]; int32 var_6734 = const()[name = string("op_6734"), val = int32(-1)]; bool var_6735_interleave_0 = const()[name = string("op_6735_interleave_0"), val = bool(false)]; tensor var_6735_cast_fp16 = concat(axis = var_6734, interleave = var_6735_interleave_0, values = (var_6732_cast_fp16, x1_63_cast_fp16))[name = string("op_6735_cast_fp16")]; tensor var_6736_cast_fp16 = mul(x = var_6735_cast_fp16, y = sin_1_cast_fp16)[name = string("op_6736_cast_fp16")]; tensor k_95_cast_fp16 = add(x = var_6711_cast_fp16, y = var_6736_cast_fp16)[name = string("k_95_cast_fp16")]; tensor var_6743 = const()[name = string("op_6743"), val = tensor([1, 1024, 1, 1])]; tensor nk_31_cast_fp16 = reshape(shape = var_6743, x = k_95_cast_fp16)[name = string("nk_31_cast_fp16")]; tensor var_6749 = const()[name = string("op_6749"), val = tensor([1, 1024, 1, 1])]; tensor nv_31_cast_fp16 = reshape(shape = var_6749, x = linear_107_cast_fp16)[name = string("nv_31_cast_fp16")]; tensor var_6754_cast_fp16 = mul(x = var_6543_cast_fp16, y = var_1203_cast_fp16)[name = string("op_6754_cast_fp16")]; tensor var_6755_cast_fp16 = mul(x = nk_31_cast_fp16, y = update_mask_cast_fp16)[name = string("op_6755_cast_fp16")]; tensor lkc_63_cast_fp16 = add(x = var_6754_cast_fp16, y = var_6755_cast_fp16)[name = string("lkc_63_cast_fp16")]; tensor var_6761_cast_fp16 = mul(x = var_6563_cast_fp16, y = var_1203_cast_fp16)[name = string("op_6761_cast_fp16")]; tensor var_6762_cast_fp16 = mul(x = nv_31_cast_fp16, y = update_mask_cast_fp16)[name = string("op_6762_cast_fp16")]; tensor lvc_63_cast_fp16 = add(x = var_6761_cast_fp16, y = var_6762_cast_fp16)[name = string("lvc_63_cast_fp16")]; tensor var_6766_axes_0 = const()[name = string("op_6766_axes_0"), val = tensor([2])]; tensor var_6766_cast_fp16 = squeeze(axes = var_6766_axes_0, x = lkc_63_cast_fp16)[name = string("op_6766_cast_fp16")]; tensor var_6771 = const()[name = string("op_6771"), val = tensor([1, 8, 128, 256])]; tensor kc_61_cast_fp16 = reshape(shape = var_6771, x = var_6766_cast_fp16)[name = string("kc_61_cast_fp16")]; tensor var_6774_axes_0 = const()[name = string("op_6774_axes_0"), val = tensor([2])]; tensor var_6774_cast_fp16 = squeeze(axes = var_6774_axes_0, x = lvc_63_cast_fp16)[name = string("op_6774_cast_fp16")]; tensor var_6779 = const()[name = string("op_6779"), val = tensor([1, 8, 128, 256])]; tensor vc_61_cast_fp16 = reshape(shape = var_6779, x = var_6774_cast_fp16)[name = string("vc_61_cast_fp16")]; tensor var_6782_axes_0 = const()[name = string("op_6782_axes_0"), val = tensor([2])]; tensor var_6782_cast_fp16 = expand_dims(axes = var_6782_axes_0, x = kc_61_cast_fp16)[name = string("op_6782_cast_fp16")]; tensor var_6790_reps_0 = const()[name = string("op_6790_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6790_cast_fp16 = tile(reps = var_6790_reps_0, x = var_6782_cast_fp16)[name = string("op_6790_cast_fp16")]; tensor var_6795 = const()[name = string("op_6795"), val = tensor([1, 16, 128, 256])]; tensor kc_63_cast_fp16 = reshape(shape = var_6795, x = var_6790_cast_fp16)[name = string("kc_63_cast_fp16")]; tensor var_6798_axes_0 = const()[name = string("op_6798_axes_0"), val = tensor([2])]; tensor var_6798_cast_fp16 = expand_dims(axes = var_6798_axes_0, x = vc_61_cast_fp16)[name = string("op_6798_cast_fp16")]; tensor var_6806_reps_0 = const()[name = string("op_6806_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_6806_cast_fp16 = tile(reps = var_6806_reps_0, x = var_6798_cast_fp16)[name = string("op_6806_cast_fp16")]; tensor var_6811 = const()[name = string("op_6811"), val = tensor([1, 16, 128, 256])]; tensor vc_63_cast_fp16 = reshape(shape = var_6811, x = var_6806_cast_fp16)[name = string("vc_63_cast_fp16")]; tensor var_6815_perm_0 = const()[name = string("op_6815_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_6816_transpose_x_0 = const()[name = string("op_6816_transpose_x_0"), val = bool(false)]; bool var_6816_transpose_y_0 = const()[name = string("op_6816_transpose_y_0"), val = bool(false)]; tensor var_6815_cast_fp16 = transpose(perm = var_6815_perm_0, x = q_95_cast_fp16)[name = string("transpose_25")]; tensor var_6816_cast_fp16 = matmul(transpose_x = var_6816_transpose_x_0, transpose_y = var_6816_transpose_y_0, x = var_6815_cast_fp16, y = kc_63_cast_fp16)[name = string("op_6816_cast_fp16")]; fp16 _inversed_aw_121_y_0_to_fp16 = const()[name = string("_inversed_aw_121_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_121_cast_fp16 = mul(x = var_6816_cast_fp16, y = _inversed_aw_121_y_0_to_fp16)[name = string("_inversed_aw_121_cast_fp16")]; tensor aw_123_cast_fp16 = add(x = _inversed_aw_121_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_123_cast_fp16")]; int32 var_6830 = const()[name = string("op_6830"), val = int32(-1)]; tensor aw_127_cast_fp16 = softmax(axis = var_6830, x = aw_123_cast_fp16)[name = string("aw_127_cast_fp16")]; bool var_6836_transpose_x_1 = const()[name = string("op_6836_transpose_x_1"), val = bool(false)]; bool var_6836_transpose_y_1 = const()[name = string("op_6836_transpose_y_1"), val = bool(true)]; tensor var_6836_cast_fp16 = matmul(transpose_x = var_6836_transpose_x_1, transpose_y = var_6836_transpose_y_1, x = aw_127_cast_fp16, y = vc_63_cast_fp16)[name = string("op_6836_cast_fp16")]; tensor var_6839_perm_0 = const()[name = string("op_6839_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_6843 = const()[name = string("op_6843"), val = tensor([1, 1, -1])]; tensor var_6839_cast_fp16 = transpose(perm = var_6839_perm_0, x = var_6836_cast_fp16)[name = string("transpose_24")]; tensor input_153_cast_fp16 = reshape(shape = var_6843, x = var_6839_cast_fp16)[name = string("input_153_cast_fp16")]; tensor layers_15_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240281472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242378688))))[name = string("layers_15_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_108_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_self_attn_o_proj_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor var_6849_axes_0 = const()[name = string("op_6849_axes_0"), val = tensor([0])]; tensor var_6849_cast_fp16 = squeeze(axes = var_6849_axes_0, x = linear_108_cast_fp16)[name = string("op_6849_cast_fp16")]; tensor var_6851_axes_0 = const()[name = string("op_6851_axes_0"), val = tensor([0])]; tensor var_6851_cast_fp16 = squeeze(axes = var_6851_axes_0, x = var_6849_cast_fp16)[name = string("op_6851_cast_fp16")]; tensor var_6853_axes_0 = const()[name = string("op_6853_axes_0"), val = tensor([-1])]; tensor var_6853_cast_fp16 = expand_dims(axes = var_6853_axes_0, x = var_6851_cast_fp16)[name = string("op_6853_cast_fp16")]; tensor ao_31_axes_0 = const()[name = string("ao_31_axes_0"), val = tensor([-1])]; tensor ao_31_cast_fp16 = expand_dims(axes = ao_31_axes_0, x = var_6853_cast_fp16)[name = string("ao_31_cast_fp16")]; tensor hidden_61_cast_fp16 = add(x = hidden_59_cast_fp16, y = ao_31_cast_fp16)[name = string("hidden_61_cast_fp16")]; tensor var_6859_axes_0 = const()[name = string("op_6859_axes_0"), val = tensor([-1])]; tensor var_6859_cast_fp16 = squeeze(axes = var_6859_axes_0, x = hidden_61_cast_fp16)[name = string("op_6859_cast_fp16")]; tensor var_6861_axes_0 = const()[name = string("op_6861_axes_0"), val = tensor([-1])]; tensor var_6861_cast_fp16 = squeeze(axes = var_6861_axes_0, x = var_6859_cast_fp16)[name = string("op_6861_cast_fp16")]; tensor hidden_states_253_axes_0 = const()[name = string("hidden_states_253_axes_0"), val = tensor([0])]; tensor hidden_states_253_cast_fp16 = expand_dims(axes = hidden_states_253_axes_0, x = var_6861_cast_fp16)[name = string("hidden_states_253_cast_fp16")]; fp16 var_6867_promoted_to_fp16 = const()[name = string("op_6867_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6873_cast_fp16 = pow(x = hidden_states_253_cast_fp16, y = var_6867_promoted_to_fp16)[name = string("op_6873_cast_fp16")]; tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([-1])]; bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; tensor variance_127_cast_fp16 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = var_6873_cast_fp16)[name = string("variance_127_cast_fp16")]; tensor const_160_to_fp16 = const()[name = string("const_160_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242379264)))]; tensor var_6877_cast_fp16 = mul(x = const_160_to_fp16, y = hidden_states_253_cast_fp16)[name = string("op_6877_cast_fp16")]; fp16 var_6878_to_fp16 = const()[name = string("op_6878_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6879_cast_fp16 = add(x = variance_127_cast_fp16, y = var_6878_to_fp16)[name = string("op_6879_cast_fp16")]; fp32 var_6880_epsilon_0 = const()[name = string("op_6880_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6880_cast_fp16 = rsqrt(epsilon = var_6880_epsilon_0, x = var_6879_cast_fp16)[name = string("op_6880_cast_fp16")]; tensor input_155_cast_fp16 = mul(x = var_6877_cast_fp16, y = var_6880_cast_fp16)[name = string("input_155_cast_fp16")]; tensor layers_15_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242381376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245527168))))[name = string("layers_15_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_gate_proj_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor var_6888_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_6888_cast_fp16")]; tensor layers_15_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248673536))))[name = string("layers_15_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_15_mlp_up_proj_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("linear_110_cast_fp16")]; tensor input_159_cast_fp16 = mul(x = var_6888_cast_fp16, y = linear_110_cast_fp16)[name = string("input_159_cast_fp16")]; tensor layers_15_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248674112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251819904))))[name = string("layers_15_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_111_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_15_mlp_down_proj_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor var_6895_axes_0 = const()[name = string("op_6895_axes_0"), val = tensor([0])]; tensor var_6895_cast_fp16 = squeeze(axes = var_6895_axes_0, x = linear_111_cast_fp16)[name = string("op_6895_cast_fp16")]; tensor var_6897_axes_0 = const()[name = string("op_6897_axes_0"), val = tensor([0])]; tensor var_6897_cast_fp16 = squeeze(axes = var_6897_axes_0, x = var_6895_cast_fp16)[name = string("op_6897_cast_fp16")]; tensor var_6899_axes_0 = const()[name = string("op_6899_axes_0"), val = tensor([-1])]; tensor var_6899_cast_fp16 = expand_dims(axes = var_6899_axes_0, x = var_6897_cast_fp16)[name = string("op_6899_cast_fp16")]; tensor h_31_axes_0 = const()[name = string("h_31_axes_0"), val = tensor([-1])]; tensor h_31_cast_fp16 = expand_dims(axes = h_31_axes_0, x = var_6899_cast_fp16)[name = string("h_31_cast_fp16")]; tensor hidden_63_cast_fp16 = add(x = hidden_61_cast_fp16, y = h_31_cast_fp16)[name = string("hidden_63_cast_fp16")]; tensor var_6913_begin_0 = const()[name = string("op_6913_begin_0"), val = tensor([0, 16384, 0, 0])]; tensor var_6913_end_0 = const()[name = string("op_6913_end_0"), val = tensor([1, 17408, 1, 256])]; tensor var_6913_end_mask_0 = const()[name = string("op_6913_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6913_cast_fp16 = slice_by_index(begin = var_6913_begin_0, end = var_6913_end_0, end_mask = var_6913_end_mask_0, x = key_cache)[name = string("op_6913_cast_fp16")]; tensor var_6933_begin_0 = const()[name = string("op_6933_begin_0"), val = tensor([0, 16384, 0, 0])]; tensor var_6933_end_0 = const()[name = string("op_6933_end_0"), val = tensor([1, 17408, 1, 256])]; tensor var_6933_end_mask_0 = const()[name = string("op_6933_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6933_cast_fp16 = slice_by_index(begin = var_6933_begin_0, end = var_6933_end_0, end_mask = var_6933_end_mask_0, x = value_cache)[name = string("op_6933_cast_fp16")]; tensor var_6945_axes_0 = const()[name = string("op_6945_axes_0"), val = tensor([-1])]; tensor var_6945_cast_fp16 = squeeze(axes = var_6945_axes_0, x = hidden_63_cast_fp16)[name = string("op_6945_cast_fp16")]; tensor var_6947_axes_0 = const()[name = string("op_6947_axes_0"), val = tensor([-1])]; tensor var_6947_cast_fp16 = squeeze(axes = var_6947_axes_0, x = var_6945_cast_fp16)[name = string("op_6947_cast_fp16")]; tensor hidden_states_257_axes_0 = const()[name = string("hidden_states_257_axes_0"), val = tensor([0])]; tensor hidden_states_257_cast_fp16 = expand_dims(axes = hidden_states_257_axes_0, x = var_6947_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; fp16 var_6953_promoted_to_fp16 = const()[name = string("op_6953_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_6959_cast_fp16 = pow(x = hidden_states_257_cast_fp16, y = var_6953_promoted_to_fp16)[name = string("op_6959_cast_fp16")]; tensor variance_129_axes_0 = const()[name = string("variance_129_axes_0"), val = tensor([-1])]; bool variance_129_keep_dims_0 = const()[name = string("variance_129_keep_dims_0"), val = bool(true)]; tensor variance_129_cast_fp16 = reduce_mean(axes = variance_129_axes_0, keep_dims = variance_129_keep_dims_0, x = var_6959_cast_fp16)[name = string("variance_129_cast_fp16")]; tensor const_161_to_fp16 = const()[name = string("const_161_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251820480)))]; tensor var_6963_cast_fp16 = mul(x = const_161_to_fp16, y = hidden_states_257_cast_fp16)[name = string("op_6963_cast_fp16")]; fp16 var_6964_to_fp16 = const()[name = string("op_6964_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_6965_cast_fp16 = add(x = variance_129_cast_fp16, y = var_6964_to_fp16)[name = string("op_6965_cast_fp16")]; fp32 var_6966_epsilon_0 = const()[name = string("op_6966_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_6966_cast_fp16 = rsqrt(epsilon = var_6966_epsilon_0, x = var_6965_cast_fp16)[name = string("op_6966_cast_fp16")]; tensor input_161_cast_fp16 = mul(x = var_6963_cast_fp16, y = var_6966_cast_fp16)[name = string("input_161_cast_fp16")]; tensor layers_16_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(251822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253919808))))[name = string("layers_16_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_16_self_attn_q_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor var_6975 = const()[name = string("op_6975"), val = tensor([1, 1, 16, 128])]; tensor var_6976_cast_fp16 = reshape(shape = var_6975, x = linear_112_cast_fp16)[name = string("op_6976_cast_fp16")]; tensor layers_16_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253920384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254969024))))[name = string("layers_16_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_k_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor var_6987 = const()[name = string("op_6987"), val = tensor([1, 1, 8, 128])]; tensor var_6988_cast_fp16 = reshape(shape = var_6987, x = linear_113_cast_fp16)[name = string("op_6988_cast_fp16")]; tensor layers_16_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254969600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256018240))))[name = string("layers_16_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_v_proj_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("linear_114_cast_fp16")]; fp16 var_7007_promoted_to_fp16 = const()[name = string("op_7007_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7013_cast_fp16 = pow(x = var_6976_cast_fp16, y = var_7007_promoted_to_fp16)[name = string("op_7013_cast_fp16")]; bool variance_131_keep_dims_0 = const()[name = string("variance_131_keep_dims_0"), val = bool(true)]; tensor const_346 = const()[name = string("const_346"), val = tensor([3])]; tensor variance_131_cast_fp16 = reduce_mean(axes = const_346, keep_dims = variance_131_keep_dims_0, x = var_7013_cast_fp16)[name = string("variance_131_cast_fp16")]; tensor const_347_to_fp16 = const()[name = string("const_347_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256018816)))]; tensor var_7017_cast_fp16 = mul(x = const_347_to_fp16, y = var_6976_cast_fp16)[name = string("op_7017_cast_fp16")]; fp16 var_7018_to_fp16 = const()[name = string("op_7018_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7019_cast_fp16 = add(x = variance_131_cast_fp16, y = var_7018_to_fp16)[name = string("op_7019_cast_fp16")]; fp32 var_7020_epsilon_0 = const()[name = string("op_7020_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7020_cast_fp16 = rsqrt(epsilon = var_7020_epsilon_0, x = var_7019_cast_fp16)[name = string("op_7020_cast_fp16")]; tensor q_97_cast_fp16 = mul(x = var_7017_cast_fp16, y = var_7020_cast_fp16)[name = string("q_97_cast_fp16")]; fp16 var_7025_promoted_to_fp16 = const()[name = string("op_7025_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7031_cast_fp16 = pow(x = var_6988_cast_fp16, y = var_7025_promoted_to_fp16)[name = string("op_7031_cast_fp16")]; bool variance_133_keep_dims_0 = const()[name = string("variance_133_keep_dims_0"), val = bool(true)]; tensor const_348 = const()[name = string("const_348"), val = tensor([3])]; tensor variance_133_cast_fp16 = reduce_mean(axes = const_348, keep_dims = variance_133_keep_dims_0, x = var_7031_cast_fp16)[name = string("variance_133_cast_fp16")]; tensor const_349_to_fp16 = const()[name = string("const_349_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256019136)))]; tensor var_7035_cast_fp16 = mul(x = const_349_to_fp16, y = var_6988_cast_fp16)[name = string("op_7035_cast_fp16")]; fp16 var_7036_to_fp16 = const()[name = string("op_7036_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7037_cast_fp16 = add(x = variance_133_cast_fp16, y = var_7036_to_fp16)[name = string("op_7037_cast_fp16")]; fp32 var_7038_epsilon_0 = const()[name = string("op_7038_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7038_cast_fp16 = rsqrt(epsilon = var_7038_epsilon_0, x = var_7037_cast_fp16)[name = string("op_7038_cast_fp16")]; tensor k_97_cast_fp16 = mul(x = var_7035_cast_fp16, y = var_7038_cast_fp16)[name = string("k_97_cast_fp16")]; tensor var_7053_cast_fp16 = mul(x = q_97_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7053_cast_fp16")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_97_cast_fp16)[name = string("x1_65_cast_fp16")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_97_cast_fp16)[name = string("x2_65_cast_fp16")]; fp16 const_166_promoted_to_fp16 = const()[name = string("const_166_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7074_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_166_promoted_to_fp16)[name = string("op_7074_cast_fp16")]; int32 var_7076 = const()[name = string("op_7076"), val = int32(-1)]; bool var_7077_interleave_0 = const()[name = string("op_7077_interleave_0"), val = bool(false)]; tensor var_7077_cast_fp16 = concat(axis = var_7076, interleave = var_7077_interleave_0, values = (var_7074_cast_fp16, x1_65_cast_fp16))[name = string("op_7077_cast_fp16")]; tensor var_7078_cast_fp16 = mul(x = var_7077_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7078_cast_fp16")]; tensor q_101_cast_fp16 = add(x = var_7053_cast_fp16, y = var_7078_cast_fp16)[name = string("q_101_cast_fp16")]; tensor var_7081_cast_fp16 = mul(x = k_97_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7081_cast_fp16")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_97_cast_fp16)[name = string("x1_67_cast_fp16")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_97_cast_fp16)[name = string("x2_67_cast_fp16")]; fp16 const_169_promoted_to_fp16 = const()[name = string("const_169_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7102_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_169_promoted_to_fp16)[name = string("op_7102_cast_fp16")]; int32 var_7104 = const()[name = string("op_7104"), val = int32(-1)]; bool var_7105_interleave_0 = const()[name = string("op_7105_interleave_0"), val = bool(false)]; tensor var_7105_cast_fp16 = concat(axis = var_7104, interleave = var_7105_interleave_0, values = (var_7102_cast_fp16, x1_67_cast_fp16))[name = string("op_7105_cast_fp16")]; tensor var_7106_cast_fp16 = mul(x = var_7105_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7106_cast_fp16")]; tensor k_101_cast_fp16 = add(x = var_7081_cast_fp16, y = var_7106_cast_fp16)[name = string("k_101_cast_fp16")]; tensor var_7113 = const()[name = string("op_7113"), val = tensor([1, 1024, 1, 1])]; tensor nk_33_cast_fp16 = reshape(shape = var_7113, x = k_101_cast_fp16)[name = string("nk_33_cast_fp16")]; tensor var_7119 = const()[name = string("op_7119"), val = tensor([1, 1024, 1, 1])]; tensor nv_33_cast_fp16 = reshape(shape = var_7119, x = linear_114_cast_fp16)[name = string("nv_33_cast_fp16")]; tensor var_7124_cast_fp16 = mul(x = var_6913_cast_fp16, y = var_1203_cast_fp16)[name = string("op_7124_cast_fp16")]; tensor var_7125_cast_fp16 = mul(x = nk_33_cast_fp16, y = update_mask_cast_fp16)[name = string("op_7125_cast_fp16")]; tensor lkc_67_cast_fp16 = add(x = var_7124_cast_fp16, y = var_7125_cast_fp16)[name = string("lkc_67_cast_fp16")]; tensor var_7131_cast_fp16 = mul(x = var_6933_cast_fp16, y = var_1203_cast_fp16)[name = string("op_7131_cast_fp16")]; tensor var_7132_cast_fp16 = mul(x = nv_33_cast_fp16, y = update_mask_cast_fp16)[name = string("op_7132_cast_fp16")]; tensor lvc_67_cast_fp16 = add(x = var_7131_cast_fp16, y = var_7132_cast_fp16)[name = string("lvc_67_cast_fp16")]; tensor var_7136_axes_0 = const()[name = string("op_7136_axes_0"), val = tensor([2])]; tensor var_7136_cast_fp16 = squeeze(axes = var_7136_axes_0, x = lkc_67_cast_fp16)[name = string("op_7136_cast_fp16")]; tensor var_7141 = const()[name = string("op_7141"), val = tensor([1, 8, 128, 256])]; tensor kc_65_cast_fp16 = reshape(shape = var_7141, x = var_7136_cast_fp16)[name = string("kc_65_cast_fp16")]; tensor var_7144_axes_0 = const()[name = string("op_7144_axes_0"), val = tensor([2])]; tensor var_7144_cast_fp16 = squeeze(axes = var_7144_axes_0, x = lvc_67_cast_fp16)[name = string("op_7144_cast_fp16")]; tensor var_7149 = const()[name = string("op_7149"), val = tensor([1, 8, 128, 256])]; tensor vc_65_cast_fp16 = reshape(shape = var_7149, x = var_7144_cast_fp16)[name = string("vc_65_cast_fp16")]; tensor var_7152_axes_0 = const()[name = string("op_7152_axes_0"), val = tensor([2])]; tensor var_7152_cast_fp16 = expand_dims(axes = var_7152_axes_0, x = kc_65_cast_fp16)[name = string("op_7152_cast_fp16")]; tensor var_7160_reps_0 = const()[name = string("op_7160_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7160_cast_fp16 = tile(reps = var_7160_reps_0, x = var_7152_cast_fp16)[name = string("op_7160_cast_fp16")]; tensor var_7165 = const()[name = string("op_7165"), val = tensor([1, 16, 128, 256])]; tensor kc_67_cast_fp16 = reshape(shape = var_7165, x = var_7160_cast_fp16)[name = string("kc_67_cast_fp16")]; tensor var_7168_axes_0 = const()[name = string("op_7168_axes_0"), val = tensor([2])]; tensor var_7168_cast_fp16 = expand_dims(axes = var_7168_axes_0, x = vc_65_cast_fp16)[name = string("op_7168_cast_fp16")]; tensor var_7176_reps_0 = const()[name = string("op_7176_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7176_cast_fp16 = tile(reps = var_7176_reps_0, x = var_7168_cast_fp16)[name = string("op_7176_cast_fp16")]; tensor var_7181 = const()[name = string("op_7181"), val = tensor([1, 16, 128, 256])]; tensor vc_67_cast_fp16 = reshape(shape = var_7181, x = var_7176_cast_fp16)[name = string("vc_67_cast_fp16")]; tensor var_7185_perm_0 = const()[name = string("op_7185_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_7186_transpose_x_0 = const()[name = string("op_7186_transpose_x_0"), val = bool(false)]; bool var_7186_transpose_y_0 = const()[name = string("op_7186_transpose_y_0"), val = bool(false)]; tensor var_7185_cast_fp16 = transpose(perm = var_7185_perm_0, x = q_101_cast_fp16)[name = string("transpose_23")]; tensor var_7186_cast_fp16 = matmul(transpose_x = var_7186_transpose_x_0, transpose_y = var_7186_transpose_y_0, x = var_7185_cast_fp16, y = kc_67_cast_fp16)[name = string("op_7186_cast_fp16")]; fp16 _inversed_aw_129_y_0_to_fp16 = const()[name = string("_inversed_aw_129_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_129_cast_fp16 = mul(x = var_7186_cast_fp16, y = _inversed_aw_129_y_0_to_fp16)[name = string("_inversed_aw_129_cast_fp16")]; tensor aw_131_cast_fp16 = add(x = _inversed_aw_129_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_131_cast_fp16")]; int32 var_7200 = const()[name = string("op_7200"), val = int32(-1)]; tensor aw_135_cast_fp16 = softmax(axis = var_7200, x = aw_131_cast_fp16)[name = string("aw_135_cast_fp16")]; bool var_7206_transpose_x_1 = const()[name = string("op_7206_transpose_x_1"), val = bool(false)]; bool var_7206_transpose_y_1 = const()[name = string("op_7206_transpose_y_1"), val = bool(true)]; tensor var_7206_cast_fp16 = matmul(transpose_x = var_7206_transpose_x_1, transpose_y = var_7206_transpose_y_1, x = aw_135_cast_fp16, y = vc_67_cast_fp16)[name = string("op_7206_cast_fp16")]; tensor var_7209_perm_0 = const()[name = string("op_7209_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7213 = const()[name = string("op_7213"), val = tensor([1, 1, -1])]; tensor var_7209_cast_fp16 = transpose(perm = var_7209_perm_0, x = var_7206_cast_fp16)[name = string("transpose_22")]; tensor input_163_cast_fp16 = reshape(shape = var_7213, x = var_7209_cast_fp16)[name = string("input_163_cast_fp16")]; tensor layers_16_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256019456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258116672))))[name = string("layers_16_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_self_attn_o_proj_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor var_7219_axes_0 = const()[name = string("op_7219_axes_0"), val = tensor([0])]; tensor var_7219_cast_fp16 = squeeze(axes = var_7219_axes_0, x = linear_115_cast_fp16)[name = string("op_7219_cast_fp16")]; tensor var_7221_axes_0 = const()[name = string("op_7221_axes_0"), val = tensor([0])]; tensor var_7221_cast_fp16 = squeeze(axes = var_7221_axes_0, x = var_7219_cast_fp16)[name = string("op_7221_cast_fp16")]; tensor var_7223_axes_0 = const()[name = string("op_7223_axes_0"), val = tensor([-1])]; tensor var_7223_cast_fp16 = expand_dims(axes = var_7223_axes_0, x = var_7221_cast_fp16)[name = string("op_7223_cast_fp16")]; tensor ao_33_axes_0 = const()[name = string("ao_33_axes_0"), val = tensor([-1])]; tensor ao_33_cast_fp16 = expand_dims(axes = ao_33_axes_0, x = var_7223_cast_fp16)[name = string("ao_33_cast_fp16")]; tensor hidden_65_cast_fp16 = add(x = hidden_63_cast_fp16, y = ao_33_cast_fp16)[name = string("hidden_65_cast_fp16")]; tensor var_7229_axes_0 = const()[name = string("op_7229_axes_0"), val = tensor([-1])]; tensor var_7229_cast_fp16 = squeeze(axes = var_7229_axes_0, x = hidden_65_cast_fp16)[name = string("op_7229_cast_fp16")]; tensor var_7231_axes_0 = const()[name = string("op_7231_axes_0"), val = tensor([-1])]; tensor var_7231_cast_fp16 = squeeze(axes = var_7231_axes_0, x = var_7229_cast_fp16)[name = string("op_7231_cast_fp16")]; tensor hidden_states_269_axes_0 = const()[name = string("hidden_states_269_axes_0"), val = tensor([0])]; tensor hidden_states_269_cast_fp16 = expand_dims(axes = hidden_states_269_axes_0, x = var_7231_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; fp16 var_7237_promoted_to_fp16 = const()[name = string("op_7237_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7243_cast_fp16 = pow(x = hidden_states_269_cast_fp16, y = var_7237_promoted_to_fp16)[name = string("op_7243_cast_fp16")]; tensor variance_135_axes_0 = const()[name = string("variance_135_axes_0"), val = tensor([-1])]; bool variance_135_keep_dims_0 = const()[name = string("variance_135_keep_dims_0"), val = bool(true)]; tensor variance_135_cast_fp16 = reduce_mean(axes = variance_135_axes_0, keep_dims = variance_135_keep_dims_0, x = var_7243_cast_fp16)[name = string("variance_135_cast_fp16")]; tensor const_170_to_fp16 = const()[name = string("const_170_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258117248)))]; tensor var_7247_cast_fp16 = mul(x = const_170_to_fp16, y = hidden_states_269_cast_fp16)[name = string("op_7247_cast_fp16")]; fp16 var_7248_to_fp16 = const()[name = string("op_7248_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7249_cast_fp16 = add(x = variance_135_cast_fp16, y = var_7248_to_fp16)[name = string("op_7249_cast_fp16")]; fp32 var_7250_epsilon_0 = const()[name = string("op_7250_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7250_cast_fp16 = rsqrt(epsilon = var_7250_epsilon_0, x = var_7249_cast_fp16)[name = string("op_7250_cast_fp16")]; tensor input_165_cast_fp16 = mul(x = var_7247_cast_fp16, y = var_7250_cast_fp16)[name = string("input_165_cast_fp16")]; tensor layers_16_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258119360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261265152))))[name = string("layers_16_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_gate_proj_weight_to_fp16_palettized, x = input_165_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor var_7258_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_7258_cast_fp16")]; tensor layers_16_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261265728))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264411520))))[name = string("layers_16_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_16_mlp_up_proj_weight_to_fp16_palettized, x = input_165_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor input_169_cast_fp16 = mul(x = var_7258_cast_fp16, y = linear_117_cast_fp16)[name = string("input_169_cast_fp16")]; tensor layers_16_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264412096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267557888))))[name = string("layers_16_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_16_mlp_down_proj_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = string("linear_118_cast_fp16")]; tensor var_7265_axes_0 = const()[name = string("op_7265_axes_0"), val = tensor([0])]; tensor var_7265_cast_fp16 = squeeze(axes = var_7265_axes_0, x = linear_118_cast_fp16)[name = string("op_7265_cast_fp16")]; tensor var_7267_axes_0 = const()[name = string("op_7267_axes_0"), val = tensor([0])]; tensor var_7267_cast_fp16 = squeeze(axes = var_7267_axes_0, x = var_7265_cast_fp16)[name = string("op_7267_cast_fp16")]; tensor var_7269_axes_0 = const()[name = string("op_7269_axes_0"), val = tensor([-1])]; tensor var_7269_cast_fp16 = expand_dims(axes = var_7269_axes_0, x = var_7267_cast_fp16)[name = string("op_7269_cast_fp16")]; tensor h_33_axes_0 = const()[name = string("h_33_axes_0"), val = tensor([-1])]; tensor h_33_cast_fp16 = expand_dims(axes = h_33_axes_0, x = var_7269_cast_fp16)[name = string("h_33_cast_fp16")]; tensor hidden_67_cast_fp16 = add(x = hidden_65_cast_fp16, y = h_33_cast_fp16)[name = string("hidden_67_cast_fp16")]; tensor var_7283_begin_0 = const()[name = string("op_7283_begin_0"), val = tensor([0, 17408, 0, 0])]; tensor var_7283_end_0 = const()[name = string("op_7283_end_0"), val = tensor([1, 18432, 1, 256])]; tensor var_7283_end_mask_0 = const()[name = string("op_7283_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7283_cast_fp16 = slice_by_index(begin = var_7283_begin_0, end = var_7283_end_0, end_mask = var_7283_end_mask_0, x = key_cache)[name = string("op_7283_cast_fp16")]; tensor var_7303_begin_0 = const()[name = string("op_7303_begin_0"), val = tensor([0, 17408, 0, 0])]; tensor var_7303_end_0 = const()[name = string("op_7303_end_0"), val = tensor([1, 18432, 1, 256])]; tensor var_7303_end_mask_0 = const()[name = string("op_7303_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7303_cast_fp16 = slice_by_index(begin = var_7303_begin_0, end = var_7303_end_0, end_mask = var_7303_end_mask_0, x = value_cache)[name = string("op_7303_cast_fp16")]; tensor var_7315_axes_0 = const()[name = string("op_7315_axes_0"), val = tensor([-1])]; tensor var_7315_cast_fp16 = squeeze(axes = var_7315_axes_0, x = hidden_67_cast_fp16)[name = string("op_7315_cast_fp16")]; tensor var_7317_axes_0 = const()[name = string("op_7317_axes_0"), val = tensor([-1])]; tensor var_7317_cast_fp16 = squeeze(axes = var_7317_axes_0, x = var_7315_cast_fp16)[name = string("op_7317_cast_fp16")]; tensor hidden_states_273_axes_0 = const()[name = string("hidden_states_273_axes_0"), val = tensor([0])]; tensor hidden_states_273_cast_fp16 = expand_dims(axes = hidden_states_273_axes_0, x = var_7317_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; fp16 var_7323_promoted_to_fp16 = const()[name = string("op_7323_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7329_cast_fp16 = pow(x = hidden_states_273_cast_fp16, y = var_7323_promoted_to_fp16)[name = string("op_7329_cast_fp16")]; tensor variance_137_axes_0 = const()[name = string("variance_137_axes_0"), val = tensor([-1])]; bool variance_137_keep_dims_0 = const()[name = string("variance_137_keep_dims_0"), val = bool(true)]; tensor variance_137_cast_fp16 = reduce_mean(axes = variance_137_axes_0, keep_dims = variance_137_keep_dims_0, x = var_7329_cast_fp16)[name = string("variance_137_cast_fp16")]; tensor const_171_to_fp16 = const()[name = string("const_171_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267558464)))]; tensor var_7333_cast_fp16 = mul(x = const_171_to_fp16, y = hidden_states_273_cast_fp16)[name = string("op_7333_cast_fp16")]; fp16 var_7334_to_fp16 = const()[name = string("op_7334_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7335_cast_fp16 = add(x = variance_137_cast_fp16, y = var_7334_to_fp16)[name = string("op_7335_cast_fp16")]; fp32 var_7336_epsilon_0 = const()[name = string("op_7336_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7336_cast_fp16 = rsqrt(epsilon = var_7336_epsilon_0, x = var_7335_cast_fp16)[name = string("op_7336_cast_fp16")]; tensor input_171_cast_fp16 = mul(x = var_7333_cast_fp16, y = var_7336_cast_fp16)[name = string("input_171_cast_fp16")]; tensor layers_17_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267560576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269657792))))[name = string("layers_17_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_17_self_attn_q_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor var_7345 = const()[name = string("op_7345"), val = tensor([1, 1, 16, 128])]; tensor var_7346_cast_fp16 = reshape(shape = var_7345, x = linear_119_cast_fp16)[name = string("op_7346_cast_fp16")]; tensor layers_17_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269658368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270707008))))[name = string("layers_17_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_k_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor var_7357 = const()[name = string("op_7357"), val = tensor([1, 1, 8, 128])]; tensor var_7358_cast_fp16 = reshape(shape = var_7357, x = linear_120_cast_fp16)[name = string("op_7358_cast_fp16")]; tensor layers_17_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270707584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271756224))))[name = string("layers_17_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_v_proj_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("linear_121_cast_fp16")]; fp16 var_7377_promoted_to_fp16 = const()[name = string("op_7377_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7383_cast_fp16 = pow(x = var_7346_cast_fp16, y = var_7377_promoted_to_fp16)[name = string("op_7383_cast_fp16")]; bool variance_139_keep_dims_0 = const()[name = string("variance_139_keep_dims_0"), val = bool(true)]; tensor const_350 = const()[name = string("const_350"), val = tensor([3])]; tensor variance_139_cast_fp16 = reduce_mean(axes = const_350, keep_dims = variance_139_keep_dims_0, x = var_7383_cast_fp16)[name = string("variance_139_cast_fp16")]; tensor const_351_to_fp16 = const()[name = string("const_351_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271756800)))]; tensor var_7387_cast_fp16 = mul(x = const_351_to_fp16, y = var_7346_cast_fp16)[name = string("op_7387_cast_fp16")]; fp16 var_7388_to_fp16 = const()[name = string("op_7388_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7389_cast_fp16 = add(x = variance_139_cast_fp16, y = var_7388_to_fp16)[name = string("op_7389_cast_fp16")]; fp32 var_7390_epsilon_0 = const()[name = string("op_7390_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7390_cast_fp16 = rsqrt(epsilon = var_7390_epsilon_0, x = var_7389_cast_fp16)[name = string("op_7390_cast_fp16")]; tensor q_103_cast_fp16 = mul(x = var_7387_cast_fp16, y = var_7390_cast_fp16)[name = string("q_103_cast_fp16")]; fp16 var_7395_promoted_to_fp16 = const()[name = string("op_7395_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7401_cast_fp16 = pow(x = var_7358_cast_fp16, y = var_7395_promoted_to_fp16)[name = string("op_7401_cast_fp16")]; bool variance_141_keep_dims_0 = const()[name = string("variance_141_keep_dims_0"), val = bool(true)]; tensor const_352 = const()[name = string("const_352"), val = tensor([3])]; tensor variance_141_cast_fp16 = reduce_mean(axes = const_352, keep_dims = variance_141_keep_dims_0, x = var_7401_cast_fp16)[name = string("variance_141_cast_fp16")]; tensor const_353_to_fp16 = const()[name = string("const_353_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271757120)))]; tensor var_7405_cast_fp16 = mul(x = const_353_to_fp16, y = var_7358_cast_fp16)[name = string("op_7405_cast_fp16")]; fp16 var_7406_to_fp16 = const()[name = string("op_7406_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7407_cast_fp16 = add(x = variance_141_cast_fp16, y = var_7406_to_fp16)[name = string("op_7407_cast_fp16")]; fp32 var_7408_epsilon_0 = const()[name = string("op_7408_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7408_cast_fp16 = rsqrt(epsilon = var_7408_epsilon_0, x = var_7407_cast_fp16)[name = string("op_7408_cast_fp16")]; tensor k_103_cast_fp16 = mul(x = var_7405_cast_fp16, y = var_7408_cast_fp16)[name = string("k_103_cast_fp16")]; tensor var_7423_cast_fp16 = mul(x = q_103_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7423_cast_fp16")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_103_cast_fp16)[name = string("x1_69_cast_fp16")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_103_cast_fp16)[name = string("x2_69_cast_fp16")]; fp16 const_176_promoted_to_fp16 = const()[name = string("const_176_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7444_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_176_promoted_to_fp16)[name = string("op_7444_cast_fp16")]; int32 var_7446 = const()[name = string("op_7446"), val = int32(-1)]; bool var_7447_interleave_0 = const()[name = string("op_7447_interleave_0"), val = bool(false)]; tensor var_7447_cast_fp16 = concat(axis = var_7446, interleave = var_7447_interleave_0, values = (var_7444_cast_fp16, x1_69_cast_fp16))[name = string("op_7447_cast_fp16")]; tensor var_7448_cast_fp16 = mul(x = var_7447_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7448_cast_fp16")]; tensor q_107_cast_fp16 = add(x = var_7423_cast_fp16, y = var_7448_cast_fp16)[name = string("q_107_cast_fp16")]; tensor var_7451_cast_fp16 = mul(x = k_103_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7451_cast_fp16")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_103_cast_fp16)[name = string("x1_71_cast_fp16")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_103_cast_fp16)[name = string("x2_71_cast_fp16")]; fp16 const_179_promoted_to_fp16 = const()[name = string("const_179_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7472_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_179_promoted_to_fp16)[name = string("op_7472_cast_fp16")]; int32 var_7474 = const()[name = string("op_7474"), val = int32(-1)]; bool var_7475_interleave_0 = const()[name = string("op_7475_interleave_0"), val = bool(false)]; tensor var_7475_cast_fp16 = concat(axis = var_7474, interleave = var_7475_interleave_0, values = (var_7472_cast_fp16, x1_71_cast_fp16))[name = string("op_7475_cast_fp16")]; tensor var_7476_cast_fp16 = mul(x = var_7475_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7476_cast_fp16")]; tensor k_107_cast_fp16 = add(x = var_7451_cast_fp16, y = var_7476_cast_fp16)[name = string("k_107_cast_fp16")]; tensor var_7483 = const()[name = string("op_7483"), val = tensor([1, 1024, 1, 1])]; tensor nk_35_cast_fp16 = reshape(shape = var_7483, x = k_107_cast_fp16)[name = string("nk_35_cast_fp16")]; tensor var_7489 = const()[name = string("op_7489"), val = tensor([1, 1024, 1, 1])]; tensor nv_35_cast_fp16 = reshape(shape = var_7489, x = linear_121_cast_fp16)[name = string("nv_35_cast_fp16")]; tensor var_7494_cast_fp16 = mul(x = var_7283_cast_fp16, y = var_1203_cast_fp16)[name = string("op_7494_cast_fp16")]; tensor var_7495_cast_fp16 = mul(x = nk_35_cast_fp16, y = update_mask_cast_fp16)[name = string("op_7495_cast_fp16")]; tensor lkc_71_cast_fp16 = add(x = var_7494_cast_fp16, y = var_7495_cast_fp16)[name = string("lkc_71_cast_fp16")]; tensor var_7501_cast_fp16 = mul(x = var_7303_cast_fp16, y = var_1203_cast_fp16)[name = string("op_7501_cast_fp16")]; tensor var_7502_cast_fp16 = mul(x = nv_35_cast_fp16, y = update_mask_cast_fp16)[name = string("op_7502_cast_fp16")]; tensor lvc_71_cast_fp16 = add(x = var_7501_cast_fp16, y = var_7502_cast_fp16)[name = string("lvc_71_cast_fp16")]; tensor var_7506_axes_0 = const()[name = string("op_7506_axes_0"), val = tensor([2])]; tensor var_7506_cast_fp16 = squeeze(axes = var_7506_axes_0, x = lkc_71_cast_fp16)[name = string("op_7506_cast_fp16")]; tensor var_7511 = const()[name = string("op_7511"), val = tensor([1, 8, 128, 256])]; tensor kc_69_cast_fp16 = reshape(shape = var_7511, x = var_7506_cast_fp16)[name = string("kc_69_cast_fp16")]; tensor var_7514_axes_0 = const()[name = string("op_7514_axes_0"), val = tensor([2])]; tensor var_7514_cast_fp16 = squeeze(axes = var_7514_axes_0, x = lvc_71_cast_fp16)[name = string("op_7514_cast_fp16")]; tensor var_7519 = const()[name = string("op_7519"), val = tensor([1, 8, 128, 256])]; tensor vc_69_cast_fp16 = reshape(shape = var_7519, x = var_7514_cast_fp16)[name = string("vc_69_cast_fp16")]; tensor var_7522_axes_0 = const()[name = string("op_7522_axes_0"), val = tensor([2])]; tensor var_7522_cast_fp16 = expand_dims(axes = var_7522_axes_0, x = kc_69_cast_fp16)[name = string("op_7522_cast_fp16")]; tensor var_7530_reps_0 = const()[name = string("op_7530_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7530_cast_fp16 = tile(reps = var_7530_reps_0, x = var_7522_cast_fp16)[name = string("op_7530_cast_fp16")]; tensor var_7535 = const()[name = string("op_7535"), val = tensor([1, 16, 128, 256])]; tensor kc_71_cast_fp16 = reshape(shape = var_7535, x = var_7530_cast_fp16)[name = string("kc_71_cast_fp16")]; tensor var_7538_axes_0 = const()[name = string("op_7538_axes_0"), val = tensor([2])]; tensor var_7538_cast_fp16 = expand_dims(axes = var_7538_axes_0, x = vc_69_cast_fp16)[name = string("op_7538_cast_fp16")]; tensor var_7546_reps_0 = const()[name = string("op_7546_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7546_cast_fp16 = tile(reps = var_7546_reps_0, x = var_7538_cast_fp16)[name = string("op_7546_cast_fp16")]; tensor var_7551 = const()[name = string("op_7551"), val = tensor([1, 16, 128, 256])]; tensor vc_71_cast_fp16 = reshape(shape = var_7551, x = var_7546_cast_fp16)[name = string("vc_71_cast_fp16")]; tensor var_7555_perm_0 = const()[name = string("op_7555_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_7556_transpose_x_0 = const()[name = string("op_7556_transpose_x_0"), val = bool(false)]; bool var_7556_transpose_y_0 = const()[name = string("op_7556_transpose_y_0"), val = bool(false)]; tensor var_7555_cast_fp16 = transpose(perm = var_7555_perm_0, x = q_107_cast_fp16)[name = string("transpose_21")]; tensor var_7556_cast_fp16 = matmul(transpose_x = var_7556_transpose_x_0, transpose_y = var_7556_transpose_y_0, x = var_7555_cast_fp16, y = kc_71_cast_fp16)[name = string("op_7556_cast_fp16")]; fp16 _inversed_aw_137_y_0_to_fp16 = const()[name = string("_inversed_aw_137_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_137_cast_fp16 = mul(x = var_7556_cast_fp16, y = _inversed_aw_137_y_0_to_fp16)[name = string("_inversed_aw_137_cast_fp16")]; tensor aw_139_cast_fp16 = add(x = _inversed_aw_137_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_139_cast_fp16")]; int32 var_7570 = const()[name = string("op_7570"), val = int32(-1)]; tensor aw_143_cast_fp16 = softmax(axis = var_7570, x = aw_139_cast_fp16)[name = string("aw_143_cast_fp16")]; bool var_7576_transpose_x_1 = const()[name = string("op_7576_transpose_x_1"), val = bool(false)]; bool var_7576_transpose_y_1 = const()[name = string("op_7576_transpose_y_1"), val = bool(true)]; tensor var_7576_cast_fp16 = matmul(transpose_x = var_7576_transpose_x_1, transpose_y = var_7576_transpose_y_1, x = aw_143_cast_fp16, y = vc_71_cast_fp16)[name = string("op_7576_cast_fp16")]; tensor var_7579_perm_0 = const()[name = string("op_7579_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7583 = const()[name = string("op_7583"), val = tensor([1, 1, -1])]; tensor var_7579_cast_fp16 = transpose(perm = var_7579_perm_0, x = var_7576_cast_fp16)[name = string("transpose_20")]; tensor input_173_cast_fp16 = reshape(shape = var_7583, x = var_7579_cast_fp16)[name = string("input_173_cast_fp16")]; tensor layers_17_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271757440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273854656))))[name = string("layers_17_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_122_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_self_attn_o_proj_weight_to_fp16_palettized, x = input_173_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor var_7589_axes_0 = const()[name = string("op_7589_axes_0"), val = tensor([0])]; tensor var_7589_cast_fp16 = squeeze(axes = var_7589_axes_0, x = linear_122_cast_fp16)[name = string("op_7589_cast_fp16")]; tensor var_7591_axes_0 = const()[name = string("op_7591_axes_0"), val = tensor([0])]; tensor var_7591_cast_fp16 = squeeze(axes = var_7591_axes_0, x = var_7589_cast_fp16)[name = string("op_7591_cast_fp16")]; tensor var_7593_axes_0 = const()[name = string("op_7593_axes_0"), val = tensor([-1])]; tensor var_7593_cast_fp16 = expand_dims(axes = var_7593_axes_0, x = var_7591_cast_fp16)[name = string("op_7593_cast_fp16")]; tensor ao_35_axes_0 = const()[name = string("ao_35_axes_0"), val = tensor([-1])]; tensor ao_35_cast_fp16 = expand_dims(axes = ao_35_axes_0, x = var_7593_cast_fp16)[name = string("ao_35_cast_fp16")]; tensor hidden_69_cast_fp16 = add(x = hidden_67_cast_fp16, y = ao_35_cast_fp16)[name = string("hidden_69_cast_fp16")]; tensor var_7599_axes_0 = const()[name = string("op_7599_axes_0"), val = tensor([-1])]; tensor var_7599_cast_fp16 = squeeze(axes = var_7599_axes_0, x = hidden_69_cast_fp16)[name = string("op_7599_cast_fp16")]; tensor var_7601_axes_0 = const()[name = string("op_7601_axes_0"), val = tensor([-1])]; tensor var_7601_cast_fp16 = squeeze(axes = var_7601_axes_0, x = var_7599_cast_fp16)[name = string("op_7601_cast_fp16")]; tensor hidden_states_285_axes_0 = const()[name = string("hidden_states_285_axes_0"), val = tensor([0])]; tensor hidden_states_285_cast_fp16 = expand_dims(axes = hidden_states_285_axes_0, x = var_7601_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; fp16 var_7607_promoted_to_fp16 = const()[name = string("op_7607_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7613_cast_fp16 = pow(x = hidden_states_285_cast_fp16, y = var_7607_promoted_to_fp16)[name = string("op_7613_cast_fp16")]; tensor variance_143_axes_0 = const()[name = string("variance_143_axes_0"), val = tensor([-1])]; bool variance_143_keep_dims_0 = const()[name = string("variance_143_keep_dims_0"), val = bool(true)]; tensor variance_143_cast_fp16 = reduce_mean(axes = variance_143_axes_0, keep_dims = variance_143_keep_dims_0, x = var_7613_cast_fp16)[name = string("variance_143_cast_fp16")]; tensor const_180_to_fp16 = const()[name = string("const_180_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273855232)))]; tensor var_7617_cast_fp16 = mul(x = const_180_to_fp16, y = hidden_states_285_cast_fp16)[name = string("op_7617_cast_fp16")]; fp16 var_7618_to_fp16 = const()[name = string("op_7618_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7619_cast_fp16 = add(x = variance_143_cast_fp16, y = var_7618_to_fp16)[name = string("op_7619_cast_fp16")]; fp32 var_7620_epsilon_0 = const()[name = string("op_7620_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7620_cast_fp16 = rsqrt(epsilon = var_7620_epsilon_0, x = var_7619_cast_fp16)[name = string("op_7620_cast_fp16")]; tensor input_175_cast_fp16 = mul(x = var_7617_cast_fp16, y = var_7620_cast_fp16)[name = string("input_175_cast_fp16")]; tensor layers_17_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273857344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277003136))))[name = string("layers_17_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_gate_proj_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor var_7628_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_7628_cast_fp16")]; tensor layers_17_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277003712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280149504))))[name = string("layers_17_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_17_mlp_up_proj_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor input_179_cast_fp16 = mul(x = var_7628_cast_fp16, y = linear_124_cast_fp16)[name = string("input_179_cast_fp16")]; tensor layers_17_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280150080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283295872))))[name = string("layers_17_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_17_mlp_down_proj_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor var_7635_axes_0 = const()[name = string("op_7635_axes_0"), val = tensor([0])]; tensor var_7635_cast_fp16 = squeeze(axes = var_7635_axes_0, x = linear_125_cast_fp16)[name = string("op_7635_cast_fp16")]; tensor var_7637_axes_0 = const()[name = string("op_7637_axes_0"), val = tensor([0])]; tensor var_7637_cast_fp16 = squeeze(axes = var_7637_axes_0, x = var_7635_cast_fp16)[name = string("op_7637_cast_fp16")]; tensor var_7639_axes_0 = const()[name = string("op_7639_axes_0"), val = tensor([-1])]; tensor var_7639_cast_fp16 = expand_dims(axes = var_7639_axes_0, x = var_7637_cast_fp16)[name = string("op_7639_cast_fp16")]; tensor h_35_axes_0 = const()[name = string("h_35_axes_0"), val = tensor([-1])]; tensor h_35_cast_fp16 = expand_dims(axes = h_35_axes_0, x = var_7639_cast_fp16)[name = string("h_35_cast_fp16")]; tensor hidden_71_cast_fp16 = add(x = hidden_69_cast_fp16, y = h_35_cast_fp16)[name = string("hidden_71_cast_fp16")]; tensor var_7653_begin_0 = const()[name = string("op_7653_begin_0"), val = tensor([0, 18432, 0, 0])]; tensor var_7653_end_0 = const()[name = string("op_7653_end_0"), val = tensor([1, 19456, 1, 256])]; tensor var_7653_end_mask_0 = const()[name = string("op_7653_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7653_cast_fp16 = slice_by_index(begin = var_7653_begin_0, end = var_7653_end_0, end_mask = var_7653_end_mask_0, x = key_cache)[name = string("op_7653_cast_fp16")]; tensor var_7673_begin_0 = const()[name = string("op_7673_begin_0"), val = tensor([0, 18432, 0, 0])]; tensor var_7673_end_0 = const()[name = string("op_7673_end_0"), val = tensor([1, 19456, 1, 256])]; tensor var_7673_end_mask_0 = const()[name = string("op_7673_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7673_cast_fp16 = slice_by_index(begin = var_7673_begin_0, end = var_7673_end_0, end_mask = var_7673_end_mask_0, x = value_cache)[name = string("op_7673_cast_fp16")]; tensor var_7685_axes_0 = const()[name = string("op_7685_axes_0"), val = tensor([-1])]; tensor var_7685_cast_fp16 = squeeze(axes = var_7685_axes_0, x = hidden_71_cast_fp16)[name = string("op_7685_cast_fp16")]; tensor var_7687_axes_0 = const()[name = string("op_7687_axes_0"), val = tensor([-1])]; tensor var_7687_cast_fp16 = squeeze(axes = var_7687_axes_0, x = var_7685_cast_fp16)[name = string("op_7687_cast_fp16")]; tensor hidden_states_289_axes_0 = const()[name = string("hidden_states_289_axes_0"), val = tensor([0])]; tensor hidden_states_289_cast_fp16 = expand_dims(axes = hidden_states_289_axes_0, x = var_7687_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; fp16 var_7693_promoted_to_fp16 = const()[name = string("op_7693_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7699_cast_fp16 = pow(x = hidden_states_289_cast_fp16, y = var_7693_promoted_to_fp16)[name = string("op_7699_cast_fp16")]; tensor variance_145_axes_0 = const()[name = string("variance_145_axes_0"), val = tensor([-1])]; bool variance_145_keep_dims_0 = const()[name = string("variance_145_keep_dims_0"), val = bool(true)]; tensor variance_145_cast_fp16 = reduce_mean(axes = variance_145_axes_0, keep_dims = variance_145_keep_dims_0, x = var_7699_cast_fp16)[name = string("variance_145_cast_fp16")]; tensor const_181_to_fp16 = const()[name = string("const_181_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283296448)))]; tensor var_7703_cast_fp16 = mul(x = const_181_to_fp16, y = hidden_states_289_cast_fp16)[name = string("op_7703_cast_fp16")]; fp16 var_7704_to_fp16 = const()[name = string("op_7704_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7705_cast_fp16 = add(x = variance_145_cast_fp16, y = var_7704_to_fp16)[name = string("op_7705_cast_fp16")]; fp32 var_7706_epsilon_0 = const()[name = string("op_7706_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7706_cast_fp16 = rsqrt(epsilon = var_7706_epsilon_0, x = var_7705_cast_fp16)[name = string("op_7706_cast_fp16")]; tensor input_181_cast_fp16 = mul(x = var_7703_cast_fp16, y = var_7706_cast_fp16)[name = string("input_181_cast_fp16")]; tensor layers_18_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283298560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285395776))))[name = string("layers_18_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_18_self_attn_q_proj_weight_to_fp16_palettized, x = input_181_cast_fp16)[name = string("linear_126_cast_fp16")]; tensor var_7715 = const()[name = string("op_7715"), val = tensor([1, 1, 16, 128])]; tensor var_7716_cast_fp16 = reshape(shape = var_7715, x = linear_126_cast_fp16)[name = string("op_7716_cast_fp16")]; tensor layers_18_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285396352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286444992))))[name = string("layers_18_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_k_proj_weight_to_fp16_palettized, x = input_181_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor var_7727 = const()[name = string("op_7727"), val = tensor([1, 1, 8, 128])]; tensor var_7728_cast_fp16 = reshape(shape = var_7727, x = linear_127_cast_fp16)[name = string("op_7728_cast_fp16")]; tensor layers_18_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286445568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287494208))))[name = string("layers_18_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_v_proj_weight_to_fp16_palettized, x = input_181_cast_fp16)[name = string("linear_128_cast_fp16")]; fp16 var_7747_promoted_to_fp16 = const()[name = string("op_7747_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7753_cast_fp16 = pow(x = var_7716_cast_fp16, y = var_7747_promoted_to_fp16)[name = string("op_7753_cast_fp16")]; bool variance_147_keep_dims_0 = const()[name = string("variance_147_keep_dims_0"), val = bool(true)]; tensor const_354 = const()[name = string("const_354"), val = tensor([3])]; tensor variance_147_cast_fp16 = reduce_mean(axes = const_354, keep_dims = variance_147_keep_dims_0, x = var_7753_cast_fp16)[name = string("variance_147_cast_fp16")]; tensor const_355_to_fp16 = const()[name = string("const_355_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287494784)))]; tensor var_7757_cast_fp16 = mul(x = const_355_to_fp16, y = var_7716_cast_fp16)[name = string("op_7757_cast_fp16")]; fp16 var_7758_to_fp16 = const()[name = string("op_7758_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7759_cast_fp16 = add(x = variance_147_cast_fp16, y = var_7758_to_fp16)[name = string("op_7759_cast_fp16")]; fp32 var_7760_epsilon_0 = const()[name = string("op_7760_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7760_cast_fp16 = rsqrt(epsilon = var_7760_epsilon_0, x = var_7759_cast_fp16)[name = string("op_7760_cast_fp16")]; tensor q_109_cast_fp16 = mul(x = var_7757_cast_fp16, y = var_7760_cast_fp16)[name = string("q_109_cast_fp16")]; fp16 var_7765_promoted_to_fp16 = const()[name = string("op_7765_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7771_cast_fp16 = pow(x = var_7728_cast_fp16, y = var_7765_promoted_to_fp16)[name = string("op_7771_cast_fp16")]; bool variance_149_keep_dims_0 = const()[name = string("variance_149_keep_dims_0"), val = bool(true)]; tensor const_356 = const()[name = string("const_356"), val = tensor([3])]; tensor variance_149_cast_fp16 = reduce_mean(axes = const_356, keep_dims = variance_149_keep_dims_0, x = var_7771_cast_fp16)[name = string("variance_149_cast_fp16")]; tensor const_357_to_fp16 = const()[name = string("const_357_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287495104)))]; tensor var_7775_cast_fp16 = mul(x = const_357_to_fp16, y = var_7728_cast_fp16)[name = string("op_7775_cast_fp16")]; fp16 var_7776_to_fp16 = const()[name = string("op_7776_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7777_cast_fp16 = add(x = variance_149_cast_fp16, y = var_7776_to_fp16)[name = string("op_7777_cast_fp16")]; fp32 var_7778_epsilon_0 = const()[name = string("op_7778_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7778_cast_fp16 = rsqrt(epsilon = var_7778_epsilon_0, x = var_7777_cast_fp16)[name = string("op_7778_cast_fp16")]; tensor k_109_cast_fp16 = mul(x = var_7775_cast_fp16, y = var_7778_cast_fp16)[name = string("k_109_cast_fp16")]; tensor var_7793_cast_fp16 = mul(x = q_109_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7793_cast_fp16")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_109_cast_fp16)[name = string("x1_73_cast_fp16")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_109_cast_fp16)[name = string("x2_73_cast_fp16")]; fp16 const_186_promoted_to_fp16 = const()[name = string("const_186_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7814_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_186_promoted_to_fp16)[name = string("op_7814_cast_fp16")]; int32 var_7816 = const()[name = string("op_7816"), val = int32(-1)]; bool var_7817_interleave_0 = const()[name = string("op_7817_interleave_0"), val = bool(false)]; tensor var_7817_cast_fp16 = concat(axis = var_7816, interleave = var_7817_interleave_0, values = (var_7814_cast_fp16, x1_73_cast_fp16))[name = string("op_7817_cast_fp16")]; tensor var_7818_cast_fp16 = mul(x = var_7817_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7818_cast_fp16")]; tensor q_113_cast_fp16 = add(x = var_7793_cast_fp16, y = var_7818_cast_fp16)[name = string("q_113_cast_fp16")]; tensor var_7821_cast_fp16 = mul(x = k_109_cast_fp16, y = cos_1_cast_fp16)[name = string("op_7821_cast_fp16")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_109_cast_fp16)[name = string("x1_75_cast_fp16")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_109_cast_fp16)[name = string("x2_75_cast_fp16")]; fp16 const_189_promoted_to_fp16 = const()[name = string("const_189_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_7842_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_189_promoted_to_fp16)[name = string("op_7842_cast_fp16")]; int32 var_7844 = const()[name = string("op_7844"), val = int32(-1)]; bool var_7845_interleave_0 = const()[name = string("op_7845_interleave_0"), val = bool(false)]; tensor var_7845_cast_fp16 = concat(axis = var_7844, interleave = var_7845_interleave_0, values = (var_7842_cast_fp16, x1_75_cast_fp16))[name = string("op_7845_cast_fp16")]; tensor var_7846_cast_fp16 = mul(x = var_7845_cast_fp16, y = sin_1_cast_fp16)[name = string("op_7846_cast_fp16")]; tensor k_113_cast_fp16 = add(x = var_7821_cast_fp16, y = var_7846_cast_fp16)[name = string("k_113_cast_fp16")]; tensor var_7853 = const()[name = string("op_7853"), val = tensor([1, 1024, 1, 1])]; tensor nk_37_cast_fp16 = reshape(shape = var_7853, x = k_113_cast_fp16)[name = string("nk_37_cast_fp16")]; tensor var_7859 = const()[name = string("op_7859"), val = tensor([1, 1024, 1, 1])]; tensor nv_37_cast_fp16 = reshape(shape = var_7859, x = linear_128_cast_fp16)[name = string("nv_37_cast_fp16")]; tensor var_7864_cast_fp16 = mul(x = var_7653_cast_fp16, y = var_1203_cast_fp16)[name = string("op_7864_cast_fp16")]; tensor var_7865_cast_fp16 = mul(x = nk_37_cast_fp16, y = update_mask_cast_fp16)[name = string("op_7865_cast_fp16")]; tensor lkc_75_cast_fp16 = add(x = var_7864_cast_fp16, y = var_7865_cast_fp16)[name = string("lkc_75_cast_fp16")]; tensor var_7871_cast_fp16 = mul(x = var_7673_cast_fp16, y = var_1203_cast_fp16)[name = string("op_7871_cast_fp16")]; tensor var_7872_cast_fp16 = mul(x = nv_37_cast_fp16, y = update_mask_cast_fp16)[name = string("op_7872_cast_fp16")]; tensor lvc_75_cast_fp16 = add(x = var_7871_cast_fp16, y = var_7872_cast_fp16)[name = string("lvc_75_cast_fp16")]; tensor var_7876_axes_0 = const()[name = string("op_7876_axes_0"), val = tensor([2])]; tensor var_7876_cast_fp16 = squeeze(axes = var_7876_axes_0, x = lkc_75_cast_fp16)[name = string("op_7876_cast_fp16")]; tensor var_7881 = const()[name = string("op_7881"), val = tensor([1, 8, 128, 256])]; tensor kc_73_cast_fp16 = reshape(shape = var_7881, x = var_7876_cast_fp16)[name = string("kc_73_cast_fp16")]; tensor var_7884_axes_0 = const()[name = string("op_7884_axes_0"), val = tensor([2])]; tensor var_7884_cast_fp16 = squeeze(axes = var_7884_axes_0, x = lvc_75_cast_fp16)[name = string("op_7884_cast_fp16")]; tensor var_7889 = const()[name = string("op_7889"), val = tensor([1, 8, 128, 256])]; tensor vc_73_cast_fp16 = reshape(shape = var_7889, x = var_7884_cast_fp16)[name = string("vc_73_cast_fp16")]; tensor var_7892_axes_0 = const()[name = string("op_7892_axes_0"), val = tensor([2])]; tensor var_7892_cast_fp16 = expand_dims(axes = var_7892_axes_0, x = kc_73_cast_fp16)[name = string("op_7892_cast_fp16")]; tensor var_7900_reps_0 = const()[name = string("op_7900_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7900_cast_fp16 = tile(reps = var_7900_reps_0, x = var_7892_cast_fp16)[name = string("op_7900_cast_fp16")]; tensor var_7905 = const()[name = string("op_7905"), val = tensor([1, 16, 128, 256])]; tensor kc_75_cast_fp16 = reshape(shape = var_7905, x = var_7900_cast_fp16)[name = string("kc_75_cast_fp16")]; tensor var_7908_axes_0 = const()[name = string("op_7908_axes_0"), val = tensor([2])]; tensor var_7908_cast_fp16 = expand_dims(axes = var_7908_axes_0, x = vc_73_cast_fp16)[name = string("op_7908_cast_fp16")]; tensor var_7916_reps_0 = const()[name = string("op_7916_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_7916_cast_fp16 = tile(reps = var_7916_reps_0, x = var_7908_cast_fp16)[name = string("op_7916_cast_fp16")]; tensor var_7921 = const()[name = string("op_7921"), val = tensor([1, 16, 128, 256])]; tensor vc_75_cast_fp16 = reshape(shape = var_7921, x = var_7916_cast_fp16)[name = string("vc_75_cast_fp16")]; tensor var_7925_perm_0 = const()[name = string("op_7925_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_7926_transpose_x_0 = const()[name = string("op_7926_transpose_x_0"), val = bool(false)]; bool var_7926_transpose_y_0 = const()[name = string("op_7926_transpose_y_0"), val = bool(false)]; tensor var_7925_cast_fp16 = transpose(perm = var_7925_perm_0, x = q_113_cast_fp16)[name = string("transpose_19")]; tensor var_7926_cast_fp16 = matmul(transpose_x = var_7926_transpose_x_0, transpose_y = var_7926_transpose_y_0, x = var_7925_cast_fp16, y = kc_75_cast_fp16)[name = string("op_7926_cast_fp16")]; fp16 _inversed_aw_145_y_0_to_fp16 = const()[name = string("_inversed_aw_145_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_145_cast_fp16 = mul(x = var_7926_cast_fp16, y = _inversed_aw_145_y_0_to_fp16)[name = string("_inversed_aw_145_cast_fp16")]; tensor aw_147_cast_fp16 = add(x = _inversed_aw_145_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_147_cast_fp16")]; int32 var_7940 = const()[name = string("op_7940"), val = int32(-1)]; tensor aw_151_cast_fp16 = softmax(axis = var_7940, x = aw_147_cast_fp16)[name = string("aw_151_cast_fp16")]; bool var_7946_transpose_x_1 = const()[name = string("op_7946_transpose_x_1"), val = bool(false)]; bool var_7946_transpose_y_1 = const()[name = string("op_7946_transpose_y_1"), val = bool(true)]; tensor var_7946_cast_fp16 = matmul(transpose_x = var_7946_transpose_x_1, transpose_y = var_7946_transpose_y_1, x = aw_151_cast_fp16, y = vc_75_cast_fp16)[name = string("op_7946_cast_fp16")]; tensor var_7949_perm_0 = const()[name = string("op_7949_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_7953 = const()[name = string("op_7953"), val = tensor([1, 1, -1])]; tensor var_7949_cast_fp16 = transpose(perm = var_7949_perm_0, x = var_7946_cast_fp16)[name = string("transpose_18")]; tensor input_183_cast_fp16 = reshape(shape = var_7953, x = var_7949_cast_fp16)[name = string("input_183_cast_fp16")]; tensor layers_18_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287495424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289592640))))[name = string("layers_18_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_self_attn_o_proj_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor var_7959_axes_0 = const()[name = string("op_7959_axes_0"), val = tensor([0])]; tensor var_7959_cast_fp16 = squeeze(axes = var_7959_axes_0, x = linear_129_cast_fp16)[name = string("op_7959_cast_fp16")]; tensor var_7961_axes_0 = const()[name = string("op_7961_axes_0"), val = tensor([0])]; tensor var_7961_cast_fp16 = squeeze(axes = var_7961_axes_0, x = var_7959_cast_fp16)[name = string("op_7961_cast_fp16")]; tensor var_7963_axes_0 = const()[name = string("op_7963_axes_0"), val = tensor([-1])]; tensor var_7963_cast_fp16 = expand_dims(axes = var_7963_axes_0, x = var_7961_cast_fp16)[name = string("op_7963_cast_fp16")]; tensor ao_37_axes_0 = const()[name = string("ao_37_axes_0"), val = tensor([-1])]; tensor ao_37_cast_fp16 = expand_dims(axes = ao_37_axes_0, x = var_7963_cast_fp16)[name = string("ao_37_cast_fp16")]; tensor hidden_73_cast_fp16 = add(x = hidden_71_cast_fp16, y = ao_37_cast_fp16)[name = string("hidden_73_cast_fp16")]; tensor var_7969_axes_0 = const()[name = string("op_7969_axes_0"), val = tensor([-1])]; tensor var_7969_cast_fp16 = squeeze(axes = var_7969_axes_0, x = hidden_73_cast_fp16)[name = string("op_7969_cast_fp16")]; tensor var_7971_axes_0 = const()[name = string("op_7971_axes_0"), val = tensor([-1])]; tensor var_7971_cast_fp16 = squeeze(axes = var_7971_axes_0, x = var_7969_cast_fp16)[name = string("op_7971_cast_fp16")]; tensor hidden_states_301_axes_0 = const()[name = string("hidden_states_301_axes_0"), val = tensor([0])]; tensor hidden_states_301_cast_fp16 = expand_dims(axes = hidden_states_301_axes_0, x = var_7971_cast_fp16)[name = string("hidden_states_301_cast_fp16")]; fp16 var_7977_promoted_to_fp16 = const()[name = string("op_7977_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_7983_cast_fp16 = pow(x = hidden_states_301_cast_fp16, y = var_7977_promoted_to_fp16)[name = string("op_7983_cast_fp16")]; tensor variance_151_axes_0 = const()[name = string("variance_151_axes_0"), val = tensor([-1])]; bool variance_151_keep_dims_0 = const()[name = string("variance_151_keep_dims_0"), val = bool(true)]; tensor variance_151_cast_fp16 = reduce_mean(axes = variance_151_axes_0, keep_dims = variance_151_keep_dims_0, x = var_7983_cast_fp16)[name = string("variance_151_cast_fp16")]; tensor const_190_to_fp16 = const()[name = string("const_190_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289593216)))]; tensor var_7987_cast_fp16 = mul(x = const_190_to_fp16, y = hidden_states_301_cast_fp16)[name = string("op_7987_cast_fp16")]; fp16 var_7988_to_fp16 = const()[name = string("op_7988_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_7989_cast_fp16 = add(x = variance_151_cast_fp16, y = var_7988_to_fp16)[name = string("op_7989_cast_fp16")]; fp32 var_7990_epsilon_0 = const()[name = string("op_7990_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_7990_cast_fp16 = rsqrt(epsilon = var_7990_epsilon_0, x = var_7989_cast_fp16)[name = string("op_7990_cast_fp16")]; tensor input_185_cast_fp16 = mul(x = var_7987_cast_fp16, y = var_7990_cast_fp16)[name = string("input_185_cast_fp16")]; tensor layers_18_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(289595328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292741120))))[name = string("layers_18_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_gate_proj_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_7998_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_7998_cast_fp16")]; tensor layers_18_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292741696))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295887488))))[name = string("layers_18_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_18_mlp_up_proj_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor input_189_cast_fp16 = mul(x = var_7998_cast_fp16, y = linear_131_cast_fp16)[name = string("input_189_cast_fp16")]; tensor layers_18_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295888064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299033856))))[name = string("layers_18_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_132_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_18_mlp_down_proj_weight_to_fp16_palettized, x = input_189_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor var_8005_axes_0 = const()[name = string("op_8005_axes_0"), val = tensor([0])]; tensor var_8005_cast_fp16 = squeeze(axes = var_8005_axes_0, x = linear_132_cast_fp16)[name = string("op_8005_cast_fp16")]; tensor var_8007_axes_0 = const()[name = string("op_8007_axes_0"), val = tensor([0])]; tensor var_8007_cast_fp16 = squeeze(axes = var_8007_axes_0, x = var_8005_cast_fp16)[name = string("op_8007_cast_fp16")]; tensor var_8009_axes_0 = const()[name = string("op_8009_axes_0"), val = tensor([-1])]; tensor var_8009_cast_fp16 = expand_dims(axes = var_8009_axes_0, x = var_8007_cast_fp16)[name = string("op_8009_cast_fp16")]; tensor h_37_axes_0 = const()[name = string("h_37_axes_0"), val = tensor([-1])]; tensor h_37_cast_fp16 = expand_dims(axes = h_37_axes_0, x = var_8009_cast_fp16)[name = string("h_37_cast_fp16")]; tensor hidden_75_cast_fp16 = add(x = hidden_73_cast_fp16, y = h_37_cast_fp16)[name = string("hidden_75_cast_fp16")]; tensor var_8023_begin_0 = const()[name = string("op_8023_begin_0"), val = tensor([0, 19456, 0, 0])]; tensor var_8023_end_0 = const()[name = string("op_8023_end_0"), val = tensor([1, 20480, 1, 256])]; tensor var_8023_end_mask_0 = const()[name = string("op_8023_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8023_cast_fp16 = slice_by_index(begin = var_8023_begin_0, end = var_8023_end_0, end_mask = var_8023_end_mask_0, x = key_cache)[name = string("op_8023_cast_fp16")]; tensor var_8043_begin_0 = const()[name = string("op_8043_begin_0"), val = tensor([0, 19456, 0, 0])]; tensor var_8043_end_0 = const()[name = string("op_8043_end_0"), val = tensor([1, 20480, 1, 256])]; tensor var_8043_end_mask_0 = const()[name = string("op_8043_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8043_cast_fp16 = slice_by_index(begin = var_8043_begin_0, end = var_8043_end_0, end_mask = var_8043_end_mask_0, x = value_cache)[name = string("op_8043_cast_fp16")]; tensor var_8055_axes_0 = const()[name = string("op_8055_axes_0"), val = tensor([-1])]; tensor var_8055_cast_fp16 = squeeze(axes = var_8055_axes_0, x = hidden_75_cast_fp16)[name = string("op_8055_cast_fp16")]; tensor var_8057_axes_0 = const()[name = string("op_8057_axes_0"), val = tensor([-1])]; tensor var_8057_cast_fp16 = squeeze(axes = var_8057_axes_0, x = var_8055_cast_fp16)[name = string("op_8057_cast_fp16")]; tensor hidden_states_305_axes_0 = const()[name = string("hidden_states_305_axes_0"), val = tensor([0])]; tensor hidden_states_305_cast_fp16 = expand_dims(axes = hidden_states_305_axes_0, x = var_8057_cast_fp16)[name = string("hidden_states_305_cast_fp16")]; fp16 var_8063_promoted_to_fp16 = const()[name = string("op_8063_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8069_cast_fp16 = pow(x = hidden_states_305_cast_fp16, y = var_8063_promoted_to_fp16)[name = string("op_8069_cast_fp16")]; tensor variance_153_axes_0 = const()[name = string("variance_153_axes_0"), val = tensor([-1])]; bool variance_153_keep_dims_0 = const()[name = string("variance_153_keep_dims_0"), val = bool(true)]; tensor variance_153_cast_fp16 = reduce_mean(axes = variance_153_axes_0, keep_dims = variance_153_keep_dims_0, x = var_8069_cast_fp16)[name = string("variance_153_cast_fp16")]; tensor const_191_to_fp16 = const()[name = string("const_191_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299034432)))]; tensor var_8073_cast_fp16 = mul(x = const_191_to_fp16, y = hidden_states_305_cast_fp16)[name = string("op_8073_cast_fp16")]; fp16 var_8074_to_fp16 = const()[name = string("op_8074_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8075_cast_fp16 = add(x = variance_153_cast_fp16, y = var_8074_to_fp16)[name = string("op_8075_cast_fp16")]; fp32 var_8076_epsilon_0 = const()[name = string("op_8076_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8076_cast_fp16 = rsqrt(epsilon = var_8076_epsilon_0, x = var_8075_cast_fp16)[name = string("op_8076_cast_fp16")]; tensor input_191_cast_fp16 = mul(x = var_8073_cast_fp16, y = var_8076_cast_fp16)[name = string("input_191_cast_fp16")]; tensor layers_19_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299036544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301133760))))[name = string("layers_19_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_19_self_attn_q_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor var_8085 = const()[name = string("op_8085"), val = tensor([1, 1, 16, 128])]; tensor var_8086_cast_fp16 = reshape(shape = var_8085, x = linear_133_cast_fp16)[name = string("op_8086_cast_fp16")]; tensor layers_19_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301134336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302182976))))[name = string("layers_19_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_k_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("linear_134_cast_fp16")]; tensor var_8097 = const()[name = string("op_8097"), val = tensor([1, 1, 8, 128])]; tensor var_8098_cast_fp16 = reshape(shape = var_8097, x = linear_134_cast_fp16)[name = string("op_8098_cast_fp16")]; tensor layers_19_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302183552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303232192))))[name = string("layers_19_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_v_proj_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("linear_135_cast_fp16")]; fp16 var_8117_promoted_to_fp16 = const()[name = string("op_8117_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8123_cast_fp16 = pow(x = var_8086_cast_fp16, y = var_8117_promoted_to_fp16)[name = string("op_8123_cast_fp16")]; bool variance_155_keep_dims_0 = const()[name = string("variance_155_keep_dims_0"), val = bool(true)]; tensor const_358 = const()[name = string("const_358"), val = tensor([3])]; tensor variance_155_cast_fp16 = reduce_mean(axes = const_358, keep_dims = variance_155_keep_dims_0, x = var_8123_cast_fp16)[name = string("variance_155_cast_fp16")]; tensor const_359_to_fp16 = const()[name = string("const_359_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303232768)))]; tensor var_8127_cast_fp16 = mul(x = const_359_to_fp16, y = var_8086_cast_fp16)[name = string("op_8127_cast_fp16")]; fp16 var_8128_to_fp16 = const()[name = string("op_8128_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8129_cast_fp16 = add(x = variance_155_cast_fp16, y = var_8128_to_fp16)[name = string("op_8129_cast_fp16")]; fp32 var_8130_epsilon_0 = const()[name = string("op_8130_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8130_cast_fp16 = rsqrt(epsilon = var_8130_epsilon_0, x = var_8129_cast_fp16)[name = string("op_8130_cast_fp16")]; tensor q_115_cast_fp16 = mul(x = var_8127_cast_fp16, y = var_8130_cast_fp16)[name = string("q_115_cast_fp16")]; fp16 var_8135_promoted_to_fp16 = const()[name = string("op_8135_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8141_cast_fp16 = pow(x = var_8098_cast_fp16, y = var_8135_promoted_to_fp16)[name = string("op_8141_cast_fp16")]; bool variance_157_keep_dims_0 = const()[name = string("variance_157_keep_dims_0"), val = bool(true)]; tensor const_360 = const()[name = string("const_360"), val = tensor([3])]; tensor variance_157_cast_fp16 = reduce_mean(axes = const_360, keep_dims = variance_157_keep_dims_0, x = var_8141_cast_fp16)[name = string("variance_157_cast_fp16")]; tensor const_361_to_fp16 = const()[name = string("const_361_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303233088)))]; tensor var_8145_cast_fp16 = mul(x = const_361_to_fp16, y = var_8098_cast_fp16)[name = string("op_8145_cast_fp16")]; fp16 var_8146_to_fp16 = const()[name = string("op_8146_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8147_cast_fp16 = add(x = variance_157_cast_fp16, y = var_8146_to_fp16)[name = string("op_8147_cast_fp16")]; fp32 var_8148_epsilon_0 = const()[name = string("op_8148_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8148_cast_fp16 = rsqrt(epsilon = var_8148_epsilon_0, x = var_8147_cast_fp16)[name = string("op_8148_cast_fp16")]; tensor k_115_cast_fp16 = mul(x = var_8145_cast_fp16, y = var_8148_cast_fp16)[name = string("k_115_cast_fp16")]; tensor var_8163_cast_fp16 = mul(x = q_115_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8163_cast_fp16")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_115_cast_fp16)[name = string("x1_77_cast_fp16")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_115_cast_fp16)[name = string("x2_77_cast_fp16")]; fp16 const_196_promoted_to_fp16 = const()[name = string("const_196_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8184_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_196_promoted_to_fp16)[name = string("op_8184_cast_fp16")]; int32 var_8186 = const()[name = string("op_8186"), val = int32(-1)]; bool var_8187_interleave_0 = const()[name = string("op_8187_interleave_0"), val = bool(false)]; tensor var_8187_cast_fp16 = concat(axis = var_8186, interleave = var_8187_interleave_0, values = (var_8184_cast_fp16, x1_77_cast_fp16))[name = string("op_8187_cast_fp16")]; tensor var_8188_cast_fp16 = mul(x = var_8187_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8188_cast_fp16")]; tensor q_119_cast_fp16 = add(x = var_8163_cast_fp16, y = var_8188_cast_fp16)[name = string("q_119_cast_fp16")]; tensor var_8191_cast_fp16 = mul(x = k_115_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8191_cast_fp16")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_115_cast_fp16)[name = string("x1_79_cast_fp16")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_115_cast_fp16)[name = string("x2_79_cast_fp16")]; fp16 const_199_promoted_to_fp16 = const()[name = string("const_199_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8212_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_199_promoted_to_fp16)[name = string("op_8212_cast_fp16")]; int32 var_8214 = const()[name = string("op_8214"), val = int32(-1)]; bool var_8215_interleave_0 = const()[name = string("op_8215_interleave_0"), val = bool(false)]; tensor var_8215_cast_fp16 = concat(axis = var_8214, interleave = var_8215_interleave_0, values = (var_8212_cast_fp16, x1_79_cast_fp16))[name = string("op_8215_cast_fp16")]; tensor var_8216_cast_fp16 = mul(x = var_8215_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8216_cast_fp16")]; tensor k_119_cast_fp16 = add(x = var_8191_cast_fp16, y = var_8216_cast_fp16)[name = string("k_119_cast_fp16")]; tensor var_8223 = const()[name = string("op_8223"), val = tensor([1, 1024, 1, 1])]; tensor nk_39_cast_fp16 = reshape(shape = var_8223, x = k_119_cast_fp16)[name = string("nk_39_cast_fp16")]; tensor var_8229 = const()[name = string("op_8229"), val = tensor([1, 1024, 1, 1])]; tensor nv_39_cast_fp16 = reshape(shape = var_8229, x = linear_135_cast_fp16)[name = string("nv_39_cast_fp16")]; tensor var_8234_cast_fp16 = mul(x = var_8023_cast_fp16, y = var_1203_cast_fp16)[name = string("op_8234_cast_fp16")]; tensor var_8235_cast_fp16 = mul(x = nk_39_cast_fp16, y = update_mask_cast_fp16)[name = string("op_8235_cast_fp16")]; tensor lkc_79_cast_fp16 = add(x = var_8234_cast_fp16, y = var_8235_cast_fp16)[name = string("lkc_79_cast_fp16")]; tensor var_8241_cast_fp16 = mul(x = var_8043_cast_fp16, y = var_1203_cast_fp16)[name = string("op_8241_cast_fp16")]; tensor var_8242_cast_fp16 = mul(x = nv_39_cast_fp16, y = update_mask_cast_fp16)[name = string("op_8242_cast_fp16")]; tensor lvc_79_cast_fp16 = add(x = var_8241_cast_fp16, y = var_8242_cast_fp16)[name = string("lvc_79_cast_fp16")]; tensor var_8246_axes_0 = const()[name = string("op_8246_axes_0"), val = tensor([2])]; tensor var_8246_cast_fp16 = squeeze(axes = var_8246_axes_0, x = lkc_79_cast_fp16)[name = string("op_8246_cast_fp16")]; tensor var_8251 = const()[name = string("op_8251"), val = tensor([1, 8, 128, 256])]; tensor kc_77_cast_fp16 = reshape(shape = var_8251, x = var_8246_cast_fp16)[name = string("kc_77_cast_fp16")]; tensor var_8254_axes_0 = const()[name = string("op_8254_axes_0"), val = tensor([2])]; tensor var_8254_cast_fp16 = squeeze(axes = var_8254_axes_0, x = lvc_79_cast_fp16)[name = string("op_8254_cast_fp16")]; tensor var_8259 = const()[name = string("op_8259"), val = tensor([1, 8, 128, 256])]; tensor vc_77_cast_fp16 = reshape(shape = var_8259, x = var_8254_cast_fp16)[name = string("vc_77_cast_fp16")]; tensor var_8262_axes_0 = const()[name = string("op_8262_axes_0"), val = tensor([2])]; tensor var_8262_cast_fp16 = expand_dims(axes = var_8262_axes_0, x = kc_77_cast_fp16)[name = string("op_8262_cast_fp16")]; tensor var_8270_reps_0 = const()[name = string("op_8270_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8270_cast_fp16 = tile(reps = var_8270_reps_0, x = var_8262_cast_fp16)[name = string("op_8270_cast_fp16")]; tensor var_8275 = const()[name = string("op_8275"), val = tensor([1, 16, 128, 256])]; tensor kc_79_cast_fp16 = reshape(shape = var_8275, x = var_8270_cast_fp16)[name = string("kc_79_cast_fp16")]; tensor var_8278_axes_0 = const()[name = string("op_8278_axes_0"), val = tensor([2])]; tensor var_8278_cast_fp16 = expand_dims(axes = var_8278_axes_0, x = vc_77_cast_fp16)[name = string("op_8278_cast_fp16")]; tensor var_8286_reps_0 = const()[name = string("op_8286_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8286_cast_fp16 = tile(reps = var_8286_reps_0, x = var_8278_cast_fp16)[name = string("op_8286_cast_fp16")]; tensor var_8291 = const()[name = string("op_8291"), val = tensor([1, 16, 128, 256])]; tensor vc_79_cast_fp16 = reshape(shape = var_8291, x = var_8286_cast_fp16)[name = string("vc_79_cast_fp16")]; tensor var_8295_perm_0 = const()[name = string("op_8295_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_8296_transpose_x_0 = const()[name = string("op_8296_transpose_x_0"), val = bool(false)]; bool var_8296_transpose_y_0 = const()[name = string("op_8296_transpose_y_0"), val = bool(false)]; tensor var_8295_cast_fp16 = transpose(perm = var_8295_perm_0, x = q_119_cast_fp16)[name = string("transpose_17")]; tensor var_8296_cast_fp16 = matmul(transpose_x = var_8296_transpose_x_0, transpose_y = var_8296_transpose_y_0, x = var_8295_cast_fp16, y = kc_79_cast_fp16)[name = string("op_8296_cast_fp16")]; fp16 _inversed_aw_153_y_0_to_fp16 = const()[name = string("_inversed_aw_153_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_153_cast_fp16 = mul(x = var_8296_cast_fp16, y = _inversed_aw_153_y_0_to_fp16)[name = string("_inversed_aw_153_cast_fp16")]; tensor aw_155_cast_fp16 = add(x = _inversed_aw_153_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_155_cast_fp16")]; int32 var_8310 = const()[name = string("op_8310"), val = int32(-1)]; tensor aw_159_cast_fp16 = softmax(axis = var_8310, x = aw_155_cast_fp16)[name = string("aw_159_cast_fp16")]; bool var_8316_transpose_x_1 = const()[name = string("op_8316_transpose_x_1"), val = bool(false)]; bool var_8316_transpose_y_1 = const()[name = string("op_8316_transpose_y_1"), val = bool(true)]; tensor var_8316_cast_fp16 = matmul(transpose_x = var_8316_transpose_x_1, transpose_y = var_8316_transpose_y_1, x = aw_159_cast_fp16, y = vc_79_cast_fp16)[name = string("op_8316_cast_fp16")]; tensor var_8319_perm_0 = const()[name = string("op_8319_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8323 = const()[name = string("op_8323"), val = tensor([1, 1, -1])]; tensor var_8319_cast_fp16 = transpose(perm = var_8319_perm_0, x = var_8316_cast_fp16)[name = string("transpose_16")]; tensor input_193_cast_fp16 = reshape(shape = var_8323, x = var_8319_cast_fp16)[name = string("input_193_cast_fp16")]; tensor layers_19_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303233408))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305330624))))[name = string("layers_19_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_self_attn_o_proj_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor var_8329_axes_0 = const()[name = string("op_8329_axes_0"), val = tensor([0])]; tensor var_8329_cast_fp16 = squeeze(axes = var_8329_axes_0, x = linear_136_cast_fp16)[name = string("op_8329_cast_fp16")]; tensor var_8331_axes_0 = const()[name = string("op_8331_axes_0"), val = tensor([0])]; tensor var_8331_cast_fp16 = squeeze(axes = var_8331_axes_0, x = var_8329_cast_fp16)[name = string("op_8331_cast_fp16")]; tensor var_8333_axes_0 = const()[name = string("op_8333_axes_0"), val = tensor([-1])]; tensor var_8333_cast_fp16 = expand_dims(axes = var_8333_axes_0, x = var_8331_cast_fp16)[name = string("op_8333_cast_fp16")]; tensor ao_39_axes_0 = const()[name = string("ao_39_axes_0"), val = tensor([-1])]; tensor ao_39_cast_fp16 = expand_dims(axes = ao_39_axes_0, x = var_8333_cast_fp16)[name = string("ao_39_cast_fp16")]; tensor hidden_77_cast_fp16 = add(x = hidden_75_cast_fp16, y = ao_39_cast_fp16)[name = string("hidden_77_cast_fp16")]; tensor var_8339_axes_0 = const()[name = string("op_8339_axes_0"), val = tensor([-1])]; tensor var_8339_cast_fp16 = squeeze(axes = var_8339_axes_0, x = hidden_77_cast_fp16)[name = string("op_8339_cast_fp16")]; tensor var_8341_axes_0 = const()[name = string("op_8341_axes_0"), val = tensor([-1])]; tensor var_8341_cast_fp16 = squeeze(axes = var_8341_axes_0, x = var_8339_cast_fp16)[name = string("op_8341_cast_fp16")]; tensor hidden_states_317_axes_0 = const()[name = string("hidden_states_317_axes_0"), val = tensor([0])]; tensor hidden_states_317_cast_fp16 = expand_dims(axes = hidden_states_317_axes_0, x = var_8341_cast_fp16)[name = string("hidden_states_317_cast_fp16")]; fp16 var_8347_promoted_to_fp16 = const()[name = string("op_8347_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8353_cast_fp16 = pow(x = hidden_states_317_cast_fp16, y = var_8347_promoted_to_fp16)[name = string("op_8353_cast_fp16")]; tensor variance_159_axes_0 = const()[name = string("variance_159_axes_0"), val = tensor([-1])]; bool variance_159_keep_dims_0 = const()[name = string("variance_159_keep_dims_0"), val = bool(true)]; tensor variance_159_cast_fp16 = reduce_mean(axes = variance_159_axes_0, keep_dims = variance_159_keep_dims_0, x = var_8353_cast_fp16)[name = string("variance_159_cast_fp16")]; tensor const_200_to_fp16 = const()[name = string("const_200_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305331200)))]; tensor var_8357_cast_fp16 = mul(x = const_200_to_fp16, y = hidden_states_317_cast_fp16)[name = string("op_8357_cast_fp16")]; fp16 var_8358_to_fp16 = const()[name = string("op_8358_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8359_cast_fp16 = add(x = variance_159_cast_fp16, y = var_8358_to_fp16)[name = string("op_8359_cast_fp16")]; fp32 var_8360_epsilon_0 = const()[name = string("op_8360_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8360_cast_fp16 = rsqrt(epsilon = var_8360_epsilon_0, x = var_8359_cast_fp16)[name = string("op_8360_cast_fp16")]; tensor input_195_cast_fp16 = mul(x = var_8357_cast_fp16, y = var_8360_cast_fp16)[name = string("input_195_cast_fp16")]; tensor layers_19_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305333312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308479104))))[name = string("layers_19_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_gate_proj_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_8368_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_8368_cast_fp16")]; tensor layers_19_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308479680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311625472))))[name = string("layers_19_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_19_mlp_up_proj_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor input_199_cast_fp16 = mul(x = var_8368_cast_fp16, y = linear_138_cast_fp16)[name = string("input_199_cast_fp16")]; tensor layers_19_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311626048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314771840))))[name = string("layers_19_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_19_mlp_down_proj_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor var_8375_axes_0 = const()[name = string("op_8375_axes_0"), val = tensor([0])]; tensor var_8375_cast_fp16 = squeeze(axes = var_8375_axes_0, x = linear_139_cast_fp16)[name = string("op_8375_cast_fp16")]; tensor var_8377_axes_0 = const()[name = string("op_8377_axes_0"), val = tensor([0])]; tensor var_8377_cast_fp16 = squeeze(axes = var_8377_axes_0, x = var_8375_cast_fp16)[name = string("op_8377_cast_fp16")]; tensor var_8379_axes_0 = const()[name = string("op_8379_axes_0"), val = tensor([-1])]; tensor var_8379_cast_fp16 = expand_dims(axes = var_8379_axes_0, x = var_8377_cast_fp16)[name = string("op_8379_cast_fp16")]; tensor h_39_axes_0 = const()[name = string("h_39_axes_0"), val = tensor([-1])]; tensor h_39_cast_fp16 = expand_dims(axes = h_39_axes_0, x = var_8379_cast_fp16)[name = string("h_39_cast_fp16")]; tensor hidden_79_cast_fp16 = add(x = hidden_77_cast_fp16, y = h_39_cast_fp16)[name = string("hidden_79_cast_fp16")]; tensor var_8393_begin_0 = const()[name = string("op_8393_begin_0"), val = tensor([0, 20480, 0, 0])]; tensor var_8393_end_0 = const()[name = string("op_8393_end_0"), val = tensor([1, 21504, 1, 256])]; tensor var_8393_end_mask_0 = const()[name = string("op_8393_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8393_cast_fp16 = slice_by_index(begin = var_8393_begin_0, end = var_8393_end_0, end_mask = var_8393_end_mask_0, x = key_cache)[name = string("op_8393_cast_fp16")]; tensor var_8413_begin_0 = const()[name = string("op_8413_begin_0"), val = tensor([0, 20480, 0, 0])]; tensor var_8413_end_0 = const()[name = string("op_8413_end_0"), val = tensor([1, 21504, 1, 256])]; tensor var_8413_end_mask_0 = const()[name = string("op_8413_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8413_cast_fp16 = slice_by_index(begin = var_8413_begin_0, end = var_8413_end_0, end_mask = var_8413_end_mask_0, x = value_cache)[name = string("op_8413_cast_fp16")]; tensor var_8425_axes_0 = const()[name = string("op_8425_axes_0"), val = tensor([-1])]; tensor var_8425_cast_fp16 = squeeze(axes = var_8425_axes_0, x = hidden_79_cast_fp16)[name = string("op_8425_cast_fp16")]; tensor var_8427_axes_0 = const()[name = string("op_8427_axes_0"), val = tensor([-1])]; tensor var_8427_cast_fp16 = squeeze(axes = var_8427_axes_0, x = var_8425_cast_fp16)[name = string("op_8427_cast_fp16")]; tensor hidden_states_321_axes_0 = const()[name = string("hidden_states_321_axes_0"), val = tensor([0])]; tensor hidden_states_321_cast_fp16 = expand_dims(axes = hidden_states_321_axes_0, x = var_8427_cast_fp16)[name = string("hidden_states_321_cast_fp16")]; fp16 var_8433_promoted_to_fp16 = const()[name = string("op_8433_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8439_cast_fp16 = pow(x = hidden_states_321_cast_fp16, y = var_8433_promoted_to_fp16)[name = string("op_8439_cast_fp16")]; tensor variance_161_axes_0 = const()[name = string("variance_161_axes_0"), val = tensor([-1])]; bool variance_161_keep_dims_0 = const()[name = string("variance_161_keep_dims_0"), val = bool(true)]; tensor variance_161_cast_fp16 = reduce_mean(axes = variance_161_axes_0, keep_dims = variance_161_keep_dims_0, x = var_8439_cast_fp16)[name = string("variance_161_cast_fp16")]; tensor const_201_to_fp16 = const()[name = string("const_201_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314772416)))]; tensor var_8443_cast_fp16 = mul(x = const_201_to_fp16, y = hidden_states_321_cast_fp16)[name = string("op_8443_cast_fp16")]; fp16 var_8444_to_fp16 = const()[name = string("op_8444_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8445_cast_fp16 = add(x = variance_161_cast_fp16, y = var_8444_to_fp16)[name = string("op_8445_cast_fp16")]; fp32 var_8446_epsilon_0 = const()[name = string("op_8446_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8446_cast_fp16 = rsqrt(epsilon = var_8446_epsilon_0, x = var_8445_cast_fp16)[name = string("op_8446_cast_fp16")]; tensor input_201_cast_fp16 = mul(x = var_8443_cast_fp16, y = var_8446_cast_fp16)[name = string("input_201_cast_fp16")]; tensor layers_20_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314774528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316871744))))[name = string("layers_20_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_20_self_attn_q_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor var_8455 = const()[name = string("op_8455"), val = tensor([1, 1, 16, 128])]; tensor var_8456_cast_fp16 = reshape(shape = var_8455, x = linear_140_cast_fp16)[name = string("op_8456_cast_fp16")]; tensor layers_20_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316872320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317920960))))[name = string("layers_20_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_k_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor var_8467 = const()[name = string("op_8467"), val = tensor([1, 1, 8, 128])]; tensor var_8468_cast_fp16 = reshape(shape = var_8467, x = linear_141_cast_fp16)[name = string("op_8468_cast_fp16")]; tensor layers_20_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317921536))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318970176))))[name = string("layers_20_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_v_proj_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("linear_142_cast_fp16")]; fp16 var_8487_promoted_to_fp16 = const()[name = string("op_8487_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8493_cast_fp16 = pow(x = var_8456_cast_fp16, y = var_8487_promoted_to_fp16)[name = string("op_8493_cast_fp16")]; bool variance_163_keep_dims_0 = const()[name = string("variance_163_keep_dims_0"), val = bool(true)]; tensor const_362 = const()[name = string("const_362"), val = tensor([3])]; tensor variance_163_cast_fp16 = reduce_mean(axes = const_362, keep_dims = variance_163_keep_dims_0, x = var_8493_cast_fp16)[name = string("variance_163_cast_fp16")]; tensor const_363_to_fp16 = const()[name = string("const_363_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318970752)))]; tensor var_8497_cast_fp16 = mul(x = const_363_to_fp16, y = var_8456_cast_fp16)[name = string("op_8497_cast_fp16")]; fp16 var_8498_to_fp16 = const()[name = string("op_8498_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8499_cast_fp16 = add(x = variance_163_cast_fp16, y = var_8498_to_fp16)[name = string("op_8499_cast_fp16")]; fp32 var_8500_epsilon_0 = const()[name = string("op_8500_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8500_cast_fp16 = rsqrt(epsilon = var_8500_epsilon_0, x = var_8499_cast_fp16)[name = string("op_8500_cast_fp16")]; tensor q_121_cast_fp16 = mul(x = var_8497_cast_fp16, y = var_8500_cast_fp16)[name = string("q_121_cast_fp16")]; fp16 var_8505_promoted_to_fp16 = const()[name = string("op_8505_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8511_cast_fp16 = pow(x = var_8468_cast_fp16, y = var_8505_promoted_to_fp16)[name = string("op_8511_cast_fp16")]; bool variance_165_keep_dims_0 = const()[name = string("variance_165_keep_dims_0"), val = bool(true)]; tensor const_364 = const()[name = string("const_364"), val = tensor([3])]; tensor variance_165_cast_fp16 = reduce_mean(axes = const_364, keep_dims = variance_165_keep_dims_0, x = var_8511_cast_fp16)[name = string("variance_165_cast_fp16")]; tensor const_365_to_fp16 = const()[name = string("const_365_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318971072)))]; tensor var_8515_cast_fp16 = mul(x = const_365_to_fp16, y = var_8468_cast_fp16)[name = string("op_8515_cast_fp16")]; fp16 var_8516_to_fp16 = const()[name = string("op_8516_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8517_cast_fp16 = add(x = variance_165_cast_fp16, y = var_8516_to_fp16)[name = string("op_8517_cast_fp16")]; fp32 var_8518_epsilon_0 = const()[name = string("op_8518_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8518_cast_fp16 = rsqrt(epsilon = var_8518_epsilon_0, x = var_8517_cast_fp16)[name = string("op_8518_cast_fp16")]; tensor k_121_cast_fp16 = mul(x = var_8515_cast_fp16, y = var_8518_cast_fp16)[name = string("k_121_cast_fp16")]; tensor var_8533_cast_fp16 = mul(x = q_121_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8533_cast_fp16")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_121_cast_fp16)[name = string("x1_81_cast_fp16")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_121_cast_fp16)[name = string("x2_81_cast_fp16")]; fp16 const_206_promoted_to_fp16 = const()[name = string("const_206_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8554_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_206_promoted_to_fp16)[name = string("op_8554_cast_fp16")]; int32 var_8556 = const()[name = string("op_8556"), val = int32(-1)]; bool var_8557_interleave_0 = const()[name = string("op_8557_interleave_0"), val = bool(false)]; tensor var_8557_cast_fp16 = concat(axis = var_8556, interleave = var_8557_interleave_0, values = (var_8554_cast_fp16, x1_81_cast_fp16))[name = string("op_8557_cast_fp16")]; tensor var_8558_cast_fp16 = mul(x = var_8557_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8558_cast_fp16")]; tensor q_125_cast_fp16 = add(x = var_8533_cast_fp16, y = var_8558_cast_fp16)[name = string("q_125_cast_fp16")]; tensor var_8561_cast_fp16 = mul(x = k_121_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8561_cast_fp16")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_121_cast_fp16)[name = string("x1_83_cast_fp16")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_121_cast_fp16)[name = string("x2_83_cast_fp16")]; fp16 const_209_promoted_to_fp16 = const()[name = string("const_209_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8582_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_209_promoted_to_fp16)[name = string("op_8582_cast_fp16")]; int32 var_8584 = const()[name = string("op_8584"), val = int32(-1)]; bool var_8585_interleave_0 = const()[name = string("op_8585_interleave_0"), val = bool(false)]; tensor var_8585_cast_fp16 = concat(axis = var_8584, interleave = var_8585_interleave_0, values = (var_8582_cast_fp16, x1_83_cast_fp16))[name = string("op_8585_cast_fp16")]; tensor var_8586_cast_fp16 = mul(x = var_8585_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8586_cast_fp16")]; tensor k_125_cast_fp16 = add(x = var_8561_cast_fp16, y = var_8586_cast_fp16)[name = string("k_125_cast_fp16")]; tensor var_8593 = const()[name = string("op_8593"), val = tensor([1, 1024, 1, 1])]; tensor nk_41_cast_fp16 = reshape(shape = var_8593, x = k_125_cast_fp16)[name = string("nk_41_cast_fp16")]; tensor var_8599 = const()[name = string("op_8599"), val = tensor([1, 1024, 1, 1])]; tensor nv_41_cast_fp16 = reshape(shape = var_8599, x = linear_142_cast_fp16)[name = string("nv_41_cast_fp16")]; tensor var_8604_cast_fp16 = mul(x = var_8393_cast_fp16, y = var_1203_cast_fp16)[name = string("op_8604_cast_fp16")]; tensor var_8605_cast_fp16 = mul(x = nk_41_cast_fp16, y = update_mask_cast_fp16)[name = string("op_8605_cast_fp16")]; tensor lkc_83_cast_fp16 = add(x = var_8604_cast_fp16, y = var_8605_cast_fp16)[name = string("lkc_83_cast_fp16")]; tensor var_8611_cast_fp16 = mul(x = var_8413_cast_fp16, y = var_1203_cast_fp16)[name = string("op_8611_cast_fp16")]; tensor var_8612_cast_fp16 = mul(x = nv_41_cast_fp16, y = update_mask_cast_fp16)[name = string("op_8612_cast_fp16")]; tensor lvc_83_cast_fp16 = add(x = var_8611_cast_fp16, y = var_8612_cast_fp16)[name = string("lvc_83_cast_fp16")]; tensor var_8616_axes_0 = const()[name = string("op_8616_axes_0"), val = tensor([2])]; tensor var_8616_cast_fp16 = squeeze(axes = var_8616_axes_0, x = lkc_83_cast_fp16)[name = string("op_8616_cast_fp16")]; tensor var_8621 = const()[name = string("op_8621"), val = tensor([1, 8, 128, 256])]; tensor kc_81_cast_fp16 = reshape(shape = var_8621, x = var_8616_cast_fp16)[name = string("kc_81_cast_fp16")]; tensor var_8624_axes_0 = const()[name = string("op_8624_axes_0"), val = tensor([2])]; tensor var_8624_cast_fp16 = squeeze(axes = var_8624_axes_0, x = lvc_83_cast_fp16)[name = string("op_8624_cast_fp16")]; tensor var_8629 = const()[name = string("op_8629"), val = tensor([1, 8, 128, 256])]; tensor vc_81_cast_fp16 = reshape(shape = var_8629, x = var_8624_cast_fp16)[name = string("vc_81_cast_fp16")]; tensor var_8632_axes_0 = const()[name = string("op_8632_axes_0"), val = tensor([2])]; tensor var_8632_cast_fp16 = expand_dims(axes = var_8632_axes_0, x = kc_81_cast_fp16)[name = string("op_8632_cast_fp16")]; tensor var_8640_reps_0 = const()[name = string("op_8640_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8640_cast_fp16 = tile(reps = var_8640_reps_0, x = var_8632_cast_fp16)[name = string("op_8640_cast_fp16")]; tensor var_8645 = const()[name = string("op_8645"), val = tensor([1, 16, 128, 256])]; tensor kc_83_cast_fp16 = reshape(shape = var_8645, x = var_8640_cast_fp16)[name = string("kc_83_cast_fp16")]; tensor var_8648_axes_0 = const()[name = string("op_8648_axes_0"), val = tensor([2])]; tensor var_8648_cast_fp16 = expand_dims(axes = var_8648_axes_0, x = vc_81_cast_fp16)[name = string("op_8648_cast_fp16")]; tensor var_8656_reps_0 = const()[name = string("op_8656_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_8656_cast_fp16 = tile(reps = var_8656_reps_0, x = var_8648_cast_fp16)[name = string("op_8656_cast_fp16")]; tensor var_8661 = const()[name = string("op_8661"), val = tensor([1, 16, 128, 256])]; tensor vc_83_cast_fp16 = reshape(shape = var_8661, x = var_8656_cast_fp16)[name = string("vc_83_cast_fp16")]; tensor var_8665_perm_0 = const()[name = string("op_8665_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_8666_transpose_x_0 = const()[name = string("op_8666_transpose_x_0"), val = bool(false)]; bool var_8666_transpose_y_0 = const()[name = string("op_8666_transpose_y_0"), val = bool(false)]; tensor var_8665_cast_fp16 = transpose(perm = var_8665_perm_0, x = q_125_cast_fp16)[name = string("transpose_15")]; tensor var_8666_cast_fp16 = matmul(transpose_x = var_8666_transpose_x_0, transpose_y = var_8666_transpose_y_0, x = var_8665_cast_fp16, y = kc_83_cast_fp16)[name = string("op_8666_cast_fp16")]; fp16 _inversed_aw_161_y_0_to_fp16 = const()[name = string("_inversed_aw_161_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_161_cast_fp16 = mul(x = var_8666_cast_fp16, y = _inversed_aw_161_y_0_to_fp16)[name = string("_inversed_aw_161_cast_fp16")]; tensor aw_163_cast_fp16 = add(x = _inversed_aw_161_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_163_cast_fp16")]; int32 var_8680 = const()[name = string("op_8680"), val = int32(-1)]; tensor aw_167_cast_fp16 = softmax(axis = var_8680, x = aw_163_cast_fp16)[name = string("aw_167_cast_fp16")]; bool var_8686_transpose_x_1 = const()[name = string("op_8686_transpose_x_1"), val = bool(false)]; bool var_8686_transpose_y_1 = const()[name = string("op_8686_transpose_y_1"), val = bool(true)]; tensor var_8686_cast_fp16 = matmul(transpose_x = var_8686_transpose_x_1, transpose_y = var_8686_transpose_y_1, x = aw_167_cast_fp16, y = vc_83_cast_fp16)[name = string("op_8686_cast_fp16")]; tensor var_8689_perm_0 = const()[name = string("op_8689_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_8693 = const()[name = string("op_8693"), val = tensor([1, 1, -1])]; tensor var_8689_cast_fp16 = transpose(perm = var_8689_perm_0, x = var_8686_cast_fp16)[name = string("transpose_14")]; tensor input_203_cast_fp16 = reshape(shape = var_8693, x = var_8689_cast_fp16)[name = string("input_203_cast_fp16")]; tensor layers_20_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318971392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321068608))))[name = string("layers_20_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_self_attn_o_proj_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor var_8699_axes_0 = const()[name = string("op_8699_axes_0"), val = tensor([0])]; tensor var_8699_cast_fp16 = squeeze(axes = var_8699_axes_0, x = linear_143_cast_fp16)[name = string("op_8699_cast_fp16")]; tensor var_8701_axes_0 = const()[name = string("op_8701_axes_0"), val = tensor([0])]; tensor var_8701_cast_fp16 = squeeze(axes = var_8701_axes_0, x = var_8699_cast_fp16)[name = string("op_8701_cast_fp16")]; tensor var_8703_axes_0 = const()[name = string("op_8703_axes_0"), val = tensor([-1])]; tensor var_8703_cast_fp16 = expand_dims(axes = var_8703_axes_0, x = var_8701_cast_fp16)[name = string("op_8703_cast_fp16")]; tensor ao_41_axes_0 = const()[name = string("ao_41_axes_0"), val = tensor([-1])]; tensor ao_41_cast_fp16 = expand_dims(axes = ao_41_axes_0, x = var_8703_cast_fp16)[name = string("ao_41_cast_fp16")]; tensor hidden_81_cast_fp16 = add(x = hidden_79_cast_fp16, y = ao_41_cast_fp16)[name = string("hidden_81_cast_fp16")]; tensor var_8709_axes_0 = const()[name = string("op_8709_axes_0"), val = tensor([-1])]; tensor var_8709_cast_fp16 = squeeze(axes = var_8709_axes_0, x = hidden_81_cast_fp16)[name = string("op_8709_cast_fp16")]; tensor var_8711_axes_0 = const()[name = string("op_8711_axes_0"), val = tensor([-1])]; tensor var_8711_cast_fp16 = squeeze(axes = var_8711_axes_0, x = var_8709_cast_fp16)[name = string("op_8711_cast_fp16")]; tensor hidden_states_333_axes_0 = const()[name = string("hidden_states_333_axes_0"), val = tensor([0])]; tensor hidden_states_333_cast_fp16 = expand_dims(axes = hidden_states_333_axes_0, x = var_8711_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; fp16 var_8717_promoted_to_fp16 = const()[name = string("op_8717_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8723_cast_fp16 = pow(x = hidden_states_333_cast_fp16, y = var_8717_promoted_to_fp16)[name = string("op_8723_cast_fp16")]; tensor variance_167_axes_0 = const()[name = string("variance_167_axes_0"), val = tensor([-1])]; bool variance_167_keep_dims_0 = const()[name = string("variance_167_keep_dims_0"), val = bool(true)]; tensor variance_167_cast_fp16 = reduce_mean(axes = variance_167_axes_0, keep_dims = variance_167_keep_dims_0, x = var_8723_cast_fp16)[name = string("variance_167_cast_fp16")]; tensor const_210_to_fp16 = const()[name = string("const_210_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321069184)))]; tensor var_8727_cast_fp16 = mul(x = const_210_to_fp16, y = hidden_states_333_cast_fp16)[name = string("op_8727_cast_fp16")]; fp16 var_8728_to_fp16 = const()[name = string("op_8728_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8729_cast_fp16 = add(x = variance_167_cast_fp16, y = var_8728_to_fp16)[name = string("op_8729_cast_fp16")]; fp32 var_8730_epsilon_0 = const()[name = string("op_8730_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8730_cast_fp16 = rsqrt(epsilon = var_8730_epsilon_0, x = var_8729_cast_fp16)[name = string("op_8730_cast_fp16")]; tensor input_205_cast_fp16 = mul(x = var_8727_cast_fp16, y = var_8730_cast_fp16)[name = string("input_205_cast_fp16")]; tensor layers_20_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321071296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324217088))))[name = string("layers_20_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_gate_proj_weight_to_fp16_palettized, x = input_205_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_8738_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_8738_cast_fp16")]; tensor layers_20_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324217664))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327363456))))[name = string("layers_20_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_20_mlp_up_proj_weight_to_fp16_palettized, x = input_205_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor input_209_cast_fp16 = mul(x = var_8738_cast_fp16, y = linear_145_cast_fp16)[name = string("input_209_cast_fp16")]; tensor layers_20_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327364032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330509824))))[name = string("layers_20_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_146_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_20_mlp_down_proj_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor var_8745_axes_0 = const()[name = string("op_8745_axes_0"), val = tensor([0])]; tensor var_8745_cast_fp16 = squeeze(axes = var_8745_axes_0, x = linear_146_cast_fp16)[name = string("op_8745_cast_fp16")]; tensor var_8747_axes_0 = const()[name = string("op_8747_axes_0"), val = tensor([0])]; tensor var_8747_cast_fp16 = squeeze(axes = var_8747_axes_0, x = var_8745_cast_fp16)[name = string("op_8747_cast_fp16")]; tensor var_8749_axes_0 = const()[name = string("op_8749_axes_0"), val = tensor([-1])]; tensor var_8749_cast_fp16 = expand_dims(axes = var_8749_axes_0, x = var_8747_cast_fp16)[name = string("op_8749_cast_fp16")]; tensor h_41_axes_0 = const()[name = string("h_41_axes_0"), val = tensor([-1])]; tensor h_41_cast_fp16 = expand_dims(axes = h_41_axes_0, x = var_8749_cast_fp16)[name = string("h_41_cast_fp16")]; tensor hidden_83_cast_fp16 = add(x = hidden_81_cast_fp16, y = h_41_cast_fp16)[name = string("hidden_83_cast_fp16")]; tensor var_8763_begin_0 = const()[name = string("op_8763_begin_0"), val = tensor([0, 21504, 0, 0])]; tensor var_8763_end_0 = const()[name = string("op_8763_end_0"), val = tensor([1, 22528, 1, 256])]; tensor var_8763_end_mask_0 = const()[name = string("op_8763_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8763_cast_fp16 = slice_by_index(begin = var_8763_begin_0, end = var_8763_end_0, end_mask = var_8763_end_mask_0, x = key_cache)[name = string("op_8763_cast_fp16")]; tensor var_8783_begin_0 = const()[name = string("op_8783_begin_0"), val = tensor([0, 21504, 0, 0])]; tensor var_8783_end_0 = const()[name = string("op_8783_end_0"), val = tensor([1, 22528, 1, 256])]; tensor var_8783_end_mask_0 = const()[name = string("op_8783_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8783_cast_fp16 = slice_by_index(begin = var_8783_begin_0, end = var_8783_end_0, end_mask = var_8783_end_mask_0, x = value_cache)[name = string("op_8783_cast_fp16")]; tensor var_8795_axes_0 = const()[name = string("op_8795_axes_0"), val = tensor([-1])]; tensor var_8795_cast_fp16 = squeeze(axes = var_8795_axes_0, x = hidden_83_cast_fp16)[name = string("op_8795_cast_fp16")]; tensor var_8797_axes_0 = const()[name = string("op_8797_axes_0"), val = tensor([-1])]; tensor var_8797_cast_fp16 = squeeze(axes = var_8797_axes_0, x = var_8795_cast_fp16)[name = string("op_8797_cast_fp16")]; tensor hidden_states_337_axes_0 = const()[name = string("hidden_states_337_axes_0"), val = tensor([0])]; tensor hidden_states_337_cast_fp16 = expand_dims(axes = hidden_states_337_axes_0, x = var_8797_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; fp16 var_8803_promoted_to_fp16 = const()[name = string("op_8803_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8809_cast_fp16 = pow(x = hidden_states_337_cast_fp16, y = var_8803_promoted_to_fp16)[name = string("op_8809_cast_fp16")]; tensor variance_169_axes_0 = const()[name = string("variance_169_axes_0"), val = tensor([-1])]; bool variance_169_keep_dims_0 = const()[name = string("variance_169_keep_dims_0"), val = bool(true)]; tensor variance_169_cast_fp16 = reduce_mean(axes = variance_169_axes_0, keep_dims = variance_169_keep_dims_0, x = var_8809_cast_fp16)[name = string("variance_169_cast_fp16")]; tensor const_211_to_fp16 = const()[name = string("const_211_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330510400)))]; tensor var_8813_cast_fp16 = mul(x = const_211_to_fp16, y = hidden_states_337_cast_fp16)[name = string("op_8813_cast_fp16")]; fp16 var_8814_to_fp16 = const()[name = string("op_8814_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8815_cast_fp16 = add(x = variance_169_cast_fp16, y = var_8814_to_fp16)[name = string("op_8815_cast_fp16")]; fp32 var_8816_epsilon_0 = const()[name = string("op_8816_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8816_cast_fp16 = rsqrt(epsilon = var_8816_epsilon_0, x = var_8815_cast_fp16)[name = string("op_8816_cast_fp16")]; tensor input_211_cast_fp16 = mul(x = var_8813_cast_fp16, y = var_8816_cast_fp16)[name = string("input_211_cast_fp16")]; tensor layers_21_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330512512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332609728))))[name = string("layers_21_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_21_self_attn_q_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor var_8825 = const()[name = string("op_8825"), val = tensor([1, 1, 16, 128])]; tensor var_8826_cast_fp16 = reshape(shape = var_8825, x = linear_147_cast_fp16)[name = string("op_8826_cast_fp16")]; tensor layers_21_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(332610304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333658944))))[name = string("layers_21_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_k_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor var_8837 = const()[name = string("op_8837"), val = tensor([1, 1, 8, 128])]; tensor var_8838_cast_fp16 = reshape(shape = var_8837, x = linear_148_cast_fp16)[name = string("op_8838_cast_fp16")]; tensor layers_21_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333659520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334708160))))[name = string("layers_21_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_v_proj_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("linear_149_cast_fp16")]; fp16 var_8857_promoted_to_fp16 = const()[name = string("op_8857_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8863_cast_fp16 = pow(x = var_8826_cast_fp16, y = var_8857_promoted_to_fp16)[name = string("op_8863_cast_fp16")]; bool variance_171_keep_dims_0 = const()[name = string("variance_171_keep_dims_0"), val = bool(true)]; tensor const_366 = const()[name = string("const_366"), val = tensor([3])]; tensor variance_171_cast_fp16 = reduce_mean(axes = const_366, keep_dims = variance_171_keep_dims_0, x = var_8863_cast_fp16)[name = string("variance_171_cast_fp16")]; tensor const_367_to_fp16 = const()[name = string("const_367_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334708736)))]; tensor var_8867_cast_fp16 = mul(x = const_367_to_fp16, y = var_8826_cast_fp16)[name = string("op_8867_cast_fp16")]; fp16 var_8868_to_fp16 = const()[name = string("op_8868_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8869_cast_fp16 = add(x = variance_171_cast_fp16, y = var_8868_to_fp16)[name = string("op_8869_cast_fp16")]; fp32 var_8870_epsilon_0 = const()[name = string("op_8870_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8870_cast_fp16 = rsqrt(epsilon = var_8870_epsilon_0, x = var_8869_cast_fp16)[name = string("op_8870_cast_fp16")]; tensor q_127_cast_fp16 = mul(x = var_8867_cast_fp16, y = var_8870_cast_fp16)[name = string("q_127_cast_fp16")]; fp16 var_8875_promoted_to_fp16 = const()[name = string("op_8875_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_8881_cast_fp16 = pow(x = var_8838_cast_fp16, y = var_8875_promoted_to_fp16)[name = string("op_8881_cast_fp16")]; bool variance_173_keep_dims_0 = const()[name = string("variance_173_keep_dims_0"), val = bool(true)]; tensor const_368 = const()[name = string("const_368"), val = tensor([3])]; tensor variance_173_cast_fp16 = reduce_mean(axes = const_368, keep_dims = variance_173_keep_dims_0, x = var_8881_cast_fp16)[name = string("variance_173_cast_fp16")]; tensor const_369_to_fp16 = const()[name = string("const_369_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334709056)))]; tensor var_8885_cast_fp16 = mul(x = const_369_to_fp16, y = var_8838_cast_fp16)[name = string("op_8885_cast_fp16")]; fp16 var_8886_to_fp16 = const()[name = string("op_8886_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_8887_cast_fp16 = add(x = variance_173_cast_fp16, y = var_8886_to_fp16)[name = string("op_8887_cast_fp16")]; fp32 var_8888_epsilon_0 = const()[name = string("op_8888_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_8888_cast_fp16 = rsqrt(epsilon = var_8888_epsilon_0, x = var_8887_cast_fp16)[name = string("op_8888_cast_fp16")]; tensor k_127_cast_fp16 = mul(x = var_8885_cast_fp16, y = var_8888_cast_fp16)[name = string("k_127_cast_fp16")]; tensor var_8903_cast_fp16 = mul(x = q_127_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8903_cast_fp16")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_127_cast_fp16)[name = string("x1_85_cast_fp16")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_127_cast_fp16)[name = string("x2_85_cast_fp16")]; fp16 const_216_promoted_to_fp16 = const()[name = string("const_216_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8924_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_216_promoted_to_fp16)[name = string("op_8924_cast_fp16")]; int32 var_8926 = const()[name = string("op_8926"), val = int32(-1)]; bool var_8927_interleave_0 = const()[name = string("op_8927_interleave_0"), val = bool(false)]; tensor var_8927_cast_fp16 = concat(axis = var_8926, interleave = var_8927_interleave_0, values = (var_8924_cast_fp16, x1_85_cast_fp16))[name = string("op_8927_cast_fp16")]; tensor var_8928_cast_fp16 = mul(x = var_8927_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8928_cast_fp16")]; tensor q_131_cast_fp16 = add(x = var_8903_cast_fp16, y = var_8928_cast_fp16)[name = string("q_131_cast_fp16")]; tensor var_8931_cast_fp16 = mul(x = k_127_cast_fp16, y = cos_1_cast_fp16)[name = string("op_8931_cast_fp16")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_127_cast_fp16)[name = string("x1_87_cast_fp16")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_127_cast_fp16)[name = string("x2_87_cast_fp16")]; fp16 const_219_promoted_to_fp16 = const()[name = string("const_219_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_8952_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_219_promoted_to_fp16)[name = string("op_8952_cast_fp16")]; int32 var_8954 = const()[name = string("op_8954"), val = int32(-1)]; bool var_8955_interleave_0 = const()[name = string("op_8955_interleave_0"), val = bool(false)]; tensor var_8955_cast_fp16 = concat(axis = var_8954, interleave = var_8955_interleave_0, values = (var_8952_cast_fp16, x1_87_cast_fp16))[name = string("op_8955_cast_fp16")]; tensor var_8956_cast_fp16 = mul(x = var_8955_cast_fp16, y = sin_1_cast_fp16)[name = string("op_8956_cast_fp16")]; tensor k_131_cast_fp16 = add(x = var_8931_cast_fp16, y = var_8956_cast_fp16)[name = string("k_131_cast_fp16")]; tensor var_8963 = const()[name = string("op_8963"), val = tensor([1, 1024, 1, 1])]; tensor nk_43_cast_fp16 = reshape(shape = var_8963, x = k_131_cast_fp16)[name = string("nk_43_cast_fp16")]; tensor var_8969 = const()[name = string("op_8969"), val = tensor([1, 1024, 1, 1])]; tensor nv_43_cast_fp16 = reshape(shape = var_8969, x = linear_149_cast_fp16)[name = string("nv_43_cast_fp16")]; tensor var_8974_cast_fp16 = mul(x = var_8763_cast_fp16, y = var_1203_cast_fp16)[name = string("op_8974_cast_fp16")]; tensor var_8975_cast_fp16 = mul(x = nk_43_cast_fp16, y = update_mask_cast_fp16)[name = string("op_8975_cast_fp16")]; tensor lkc_87_cast_fp16 = add(x = var_8974_cast_fp16, y = var_8975_cast_fp16)[name = string("lkc_87_cast_fp16")]; tensor var_8981_cast_fp16 = mul(x = var_8783_cast_fp16, y = var_1203_cast_fp16)[name = string("op_8981_cast_fp16")]; tensor var_8982_cast_fp16 = mul(x = nv_43_cast_fp16, y = update_mask_cast_fp16)[name = string("op_8982_cast_fp16")]; tensor lvc_87_cast_fp16 = add(x = var_8981_cast_fp16, y = var_8982_cast_fp16)[name = string("lvc_87_cast_fp16")]; tensor var_8986_axes_0 = const()[name = string("op_8986_axes_0"), val = tensor([2])]; tensor var_8986_cast_fp16 = squeeze(axes = var_8986_axes_0, x = lkc_87_cast_fp16)[name = string("op_8986_cast_fp16")]; tensor var_8991 = const()[name = string("op_8991"), val = tensor([1, 8, 128, 256])]; tensor kc_85_cast_fp16 = reshape(shape = var_8991, x = var_8986_cast_fp16)[name = string("kc_85_cast_fp16")]; tensor var_8994_axes_0 = const()[name = string("op_8994_axes_0"), val = tensor([2])]; tensor var_8994_cast_fp16 = squeeze(axes = var_8994_axes_0, x = lvc_87_cast_fp16)[name = string("op_8994_cast_fp16")]; tensor var_8999 = const()[name = string("op_8999"), val = tensor([1, 8, 128, 256])]; tensor vc_85_cast_fp16 = reshape(shape = var_8999, x = var_8994_cast_fp16)[name = string("vc_85_cast_fp16")]; tensor var_9002_axes_0 = const()[name = string("op_9002_axes_0"), val = tensor([2])]; tensor var_9002_cast_fp16 = expand_dims(axes = var_9002_axes_0, x = kc_85_cast_fp16)[name = string("op_9002_cast_fp16")]; tensor var_9010_reps_0 = const()[name = string("op_9010_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9010_cast_fp16 = tile(reps = var_9010_reps_0, x = var_9002_cast_fp16)[name = string("op_9010_cast_fp16")]; tensor var_9015 = const()[name = string("op_9015"), val = tensor([1, 16, 128, 256])]; tensor kc_87_cast_fp16 = reshape(shape = var_9015, x = var_9010_cast_fp16)[name = string("kc_87_cast_fp16")]; tensor var_9018_axes_0 = const()[name = string("op_9018_axes_0"), val = tensor([2])]; tensor var_9018_cast_fp16 = expand_dims(axes = var_9018_axes_0, x = vc_85_cast_fp16)[name = string("op_9018_cast_fp16")]; tensor var_9026_reps_0 = const()[name = string("op_9026_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9026_cast_fp16 = tile(reps = var_9026_reps_0, x = var_9018_cast_fp16)[name = string("op_9026_cast_fp16")]; tensor var_9031 = const()[name = string("op_9031"), val = tensor([1, 16, 128, 256])]; tensor vc_87_cast_fp16 = reshape(shape = var_9031, x = var_9026_cast_fp16)[name = string("vc_87_cast_fp16")]; tensor var_9035_perm_0 = const()[name = string("op_9035_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_9036_transpose_x_0 = const()[name = string("op_9036_transpose_x_0"), val = bool(false)]; bool var_9036_transpose_y_0 = const()[name = string("op_9036_transpose_y_0"), val = bool(false)]; tensor var_9035_cast_fp16 = transpose(perm = var_9035_perm_0, x = q_131_cast_fp16)[name = string("transpose_13")]; tensor var_9036_cast_fp16 = matmul(transpose_x = var_9036_transpose_x_0, transpose_y = var_9036_transpose_y_0, x = var_9035_cast_fp16, y = kc_87_cast_fp16)[name = string("op_9036_cast_fp16")]; fp16 _inversed_aw_169_y_0_to_fp16 = const()[name = string("_inversed_aw_169_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_169_cast_fp16 = mul(x = var_9036_cast_fp16, y = _inversed_aw_169_y_0_to_fp16)[name = string("_inversed_aw_169_cast_fp16")]; tensor aw_171_cast_fp16 = add(x = _inversed_aw_169_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_171_cast_fp16")]; int32 var_9050 = const()[name = string("op_9050"), val = int32(-1)]; tensor aw_175_cast_fp16 = softmax(axis = var_9050, x = aw_171_cast_fp16)[name = string("aw_175_cast_fp16")]; bool var_9056_transpose_x_1 = const()[name = string("op_9056_transpose_x_1"), val = bool(false)]; bool var_9056_transpose_y_1 = const()[name = string("op_9056_transpose_y_1"), val = bool(true)]; tensor var_9056_cast_fp16 = matmul(transpose_x = var_9056_transpose_x_1, transpose_y = var_9056_transpose_y_1, x = aw_175_cast_fp16, y = vc_87_cast_fp16)[name = string("op_9056_cast_fp16")]; tensor var_9059_perm_0 = const()[name = string("op_9059_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9063 = const()[name = string("op_9063"), val = tensor([1, 1, -1])]; tensor var_9059_cast_fp16 = transpose(perm = var_9059_perm_0, x = var_9056_cast_fp16)[name = string("transpose_12")]; tensor input_213_cast_fp16 = reshape(shape = var_9063, x = var_9059_cast_fp16)[name = string("input_213_cast_fp16")]; tensor layers_21_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334709376))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336806592))))[name = string("layers_21_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_150_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_self_attn_o_proj_weight_to_fp16_palettized, x = input_213_cast_fp16)[name = string("linear_150_cast_fp16")]; tensor var_9069_axes_0 = const()[name = string("op_9069_axes_0"), val = tensor([0])]; tensor var_9069_cast_fp16 = squeeze(axes = var_9069_axes_0, x = linear_150_cast_fp16)[name = string("op_9069_cast_fp16")]; tensor var_9071_axes_0 = const()[name = string("op_9071_axes_0"), val = tensor([0])]; tensor var_9071_cast_fp16 = squeeze(axes = var_9071_axes_0, x = var_9069_cast_fp16)[name = string("op_9071_cast_fp16")]; tensor var_9073_axes_0 = const()[name = string("op_9073_axes_0"), val = tensor([-1])]; tensor var_9073_cast_fp16 = expand_dims(axes = var_9073_axes_0, x = var_9071_cast_fp16)[name = string("op_9073_cast_fp16")]; tensor ao_43_axes_0 = const()[name = string("ao_43_axes_0"), val = tensor([-1])]; tensor ao_43_cast_fp16 = expand_dims(axes = ao_43_axes_0, x = var_9073_cast_fp16)[name = string("ao_43_cast_fp16")]; tensor hidden_85_cast_fp16 = add(x = hidden_83_cast_fp16, y = ao_43_cast_fp16)[name = string("hidden_85_cast_fp16")]; tensor var_9079_axes_0 = const()[name = string("op_9079_axes_0"), val = tensor([-1])]; tensor var_9079_cast_fp16 = squeeze(axes = var_9079_axes_0, x = hidden_85_cast_fp16)[name = string("op_9079_cast_fp16")]; tensor var_9081_axes_0 = const()[name = string("op_9081_axes_0"), val = tensor([-1])]; tensor var_9081_cast_fp16 = squeeze(axes = var_9081_axes_0, x = var_9079_cast_fp16)[name = string("op_9081_cast_fp16")]; tensor hidden_states_349_axes_0 = const()[name = string("hidden_states_349_axes_0"), val = tensor([0])]; tensor hidden_states_349_cast_fp16 = expand_dims(axes = hidden_states_349_axes_0, x = var_9081_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; fp16 var_9087_promoted_to_fp16 = const()[name = string("op_9087_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9093_cast_fp16 = pow(x = hidden_states_349_cast_fp16, y = var_9087_promoted_to_fp16)[name = string("op_9093_cast_fp16")]; tensor variance_175_axes_0 = const()[name = string("variance_175_axes_0"), val = tensor([-1])]; bool variance_175_keep_dims_0 = const()[name = string("variance_175_keep_dims_0"), val = bool(true)]; tensor variance_175_cast_fp16 = reduce_mean(axes = variance_175_axes_0, keep_dims = variance_175_keep_dims_0, x = var_9093_cast_fp16)[name = string("variance_175_cast_fp16")]; tensor const_220_to_fp16 = const()[name = string("const_220_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336807168)))]; tensor var_9097_cast_fp16 = mul(x = const_220_to_fp16, y = hidden_states_349_cast_fp16)[name = string("op_9097_cast_fp16")]; fp16 var_9098_to_fp16 = const()[name = string("op_9098_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9099_cast_fp16 = add(x = variance_175_cast_fp16, y = var_9098_to_fp16)[name = string("op_9099_cast_fp16")]; fp32 var_9100_epsilon_0 = const()[name = string("op_9100_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9100_cast_fp16 = rsqrt(epsilon = var_9100_epsilon_0, x = var_9099_cast_fp16)[name = string("op_9100_cast_fp16")]; tensor input_215_cast_fp16 = mul(x = var_9097_cast_fp16, y = var_9100_cast_fp16)[name = string("input_215_cast_fp16")]; tensor layers_21_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336809280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339955072))))[name = string("layers_21_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_gate_proj_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor var_9108_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_9108_cast_fp16")]; tensor layers_21_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339955648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343101440))))[name = string("layers_21_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_21_mlp_up_proj_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor input_219_cast_fp16 = mul(x = var_9108_cast_fp16, y = linear_152_cast_fp16)[name = string("input_219_cast_fp16")]; tensor layers_21_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343102016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346247808))))[name = string("layers_21_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_21_mlp_down_proj_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor var_9115_axes_0 = const()[name = string("op_9115_axes_0"), val = tensor([0])]; tensor var_9115_cast_fp16 = squeeze(axes = var_9115_axes_0, x = linear_153_cast_fp16)[name = string("op_9115_cast_fp16")]; tensor var_9117_axes_0 = const()[name = string("op_9117_axes_0"), val = tensor([0])]; tensor var_9117_cast_fp16 = squeeze(axes = var_9117_axes_0, x = var_9115_cast_fp16)[name = string("op_9117_cast_fp16")]; tensor var_9119_axes_0 = const()[name = string("op_9119_axes_0"), val = tensor([-1])]; tensor var_9119_cast_fp16 = expand_dims(axes = var_9119_axes_0, x = var_9117_cast_fp16)[name = string("op_9119_cast_fp16")]; tensor h_43_axes_0 = const()[name = string("h_43_axes_0"), val = tensor([-1])]; tensor h_43_cast_fp16 = expand_dims(axes = h_43_axes_0, x = var_9119_cast_fp16)[name = string("h_43_cast_fp16")]; tensor hidden_87_cast_fp16 = add(x = hidden_85_cast_fp16, y = h_43_cast_fp16)[name = string("hidden_87_cast_fp16")]; tensor var_9133_begin_0 = const()[name = string("op_9133_begin_0"), val = tensor([0, 22528, 0, 0])]; tensor var_9133_end_0 = const()[name = string("op_9133_end_0"), val = tensor([1, 23552, 1, 256])]; tensor var_9133_end_mask_0 = const()[name = string("op_9133_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9133_cast_fp16 = slice_by_index(begin = var_9133_begin_0, end = var_9133_end_0, end_mask = var_9133_end_mask_0, x = key_cache)[name = string("op_9133_cast_fp16")]; tensor var_9153_begin_0 = const()[name = string("op_9153_begin_0"), val = tensor([0, 22528, 0, 0])]; tensor var_9153_end_0 = const()[name = string("op_9153_end_0"), val = tensor([1, 23552, 1, 256])]; tensor var_9153_end_mask_0 = const()[name = string("op_9153_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9153_cast_fp16 = slice_by_index(begin = var_9153_begin_0, end = var_9153_end_0, end_mask = var_9153_end_mask_0, x = value_cache)[name = string("op_9153_cast_fp16")]; tensor var_9165_axes_0 = const()[name = string("op_9165_axes_0"), val = tensor([-1])]; tensor var_9165_cast_fp16 = squeeze(axes = var_9165_axes_0, x = hidden_87_cast_fp16)[name = string("op_9165_cast_fp16")]; tensor var_9167_axes_0 = const()[name = string("op_9167_axes_0"), val = tensor([-1])]; tensor var_9167_cast_fp16 = squeeze(axes = var_9167_axes_0, x = var_9165_cast_fp16)[name = string("op_9167_cast_fp16")]; tensor hidden_states_353_axes_0 = const()[name = string("hidden_states_353_axes_0"), val = tensor([0])]; tensor hidden_states_353_cast_fp16 = expand_dims(axes = hidden_states_353_axes_0, x = var_9167_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; fp16 var_9173_promoted_to_fp16 = const()[name = string("op_9173_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9179_cast_fp16 = pow(x = hidden_states_353_cast_fp16, y = var_9173_promoted_to_fp16)[name = string("op_9179_cast_fp16")]; tensor variance_177_axes_0 = const()[name = string("variance_177_axes_0"), val = tensor([-1])]; bool variance_177_keep_dims_0 = const()[name = string("variance_177_keep_dims_0"), val = bool(true)]; tensor variance_177_cast_fp16 = reduce_mean(axes = variance_177_axes_0, keep_dims = variance_177_keep_dims_0, x = var_9179_cast_fp16)[name = string("variance_177_cast_fp16")]; tensor const_221_to_fp16 = const()[name = string("const_221_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346248384)))]; tensor var_9183_cast_fp16 = mul(x = const_221_to_fp16, y = hidden_states_353_cast_fp16)[name = string("op_9183_cast_fp16")]; fp16 var_9184_to_fp16 = const()[name = string("op_9184_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9185_cast_fp16 = add(x = variance_177_cast_fp16, y = var_9184_to_fp16)[name = string("op_9185_cast_fp16")]; fp32 var_9186_epsilon_0 = const()[name = string("op_9186_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9186_cast_fp16 = rsqrt(epsilon = var_9186_epsilon_0, x = var_9185_cast_fp16)[name = string("op_9186_cast_fp16")]; tensor input_221_cast_fp16 = mul(x = var_9183_cast_fp16, y = var_9186_cast_fp16)[name = string("input_221_cast_fp16")]; tensor layers_22_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346250496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348347712))))[name = string("layers_22_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_22_self_attn_q_proj_weight_to_fp16_palettized, x = input_221_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor var_9195 = const()[name = string("op_9195"), val = tensor([1, 1, 16, 128])]; tensor var_9196_cast_fp16 = reshape(shape = var_9195, x = linear_154_cast_fp16)[name = string("op_9196_cast_fp16")]; tensor layers_22_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348348288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349396928))))[name = string("layers_22_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_k_proj_weight_to_fp16_palettized, x = input_221_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor var_9207 = const()[name = string("op_9207"), val = tensor([1, 1, 8, 128])]; tensor var_9208_cast_fp16 = reshape(shape = var_9207, x = linear_155_cast_fp16)[name = string("op_9208_cast_fp16")]; tensor layers_22_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349397504))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350446144))))[name = string("layers_22_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_v_proj_weight_to_fp16_palettized, x = input_221_cast_fp16)[name = string("linear_156_cast_fp16")]; fp16 var_9227_promoted_to_fp16 = const()[name = string("op_9227_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9233_cast_fp16 = pow(x = var_9196_cast_fp16, y = var_9227_promoted_to_fp16)[name = string("op_9233_cast_fp16")]; bool variance_179_keep_dims_0 = const()[name = string("variance_179_keep_dims_0"), val = bool(true)]; tensor const_370 = const()[name = string("const_370"), val = tensor([3])]; tensor variance_179_cast_fp16 = reduce_mean(axes = const_370, keep_dims = variance_179_keep_dims_0, x = var_9233_cast_fp16)[name = string("variance_179_cast_fp16")]; tensor const_371_to_fp16 = const()[name = string("const_371_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350446720)))]; tensor var_9237_cast_fp16 = mul(x = const_371_to_fp16, y = var_9196_cast_fp16)[name = string("op_9237_cast_fp16")]; fp16 var_9238_to_fp16 = const()[name = string("op_9238_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9239_cast_fp16 = add(x = variance_179_cast_fp16, y = var_9238_to_fp16)[name = string("op_9239_cast_fp16")]; fp32 var_9240_epsilon_0 = const()[name = string("op_9240_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9240_cast_fp16 = rsqrt(epsilon = var_9240_epsilon_0, x = var_9239_cast_fp16)[name = string("op_9240_cast_fp16")]; tensor q_133_cast_fp16 = mul(x = var_9237_cast_fp16, y = var_9240_cast_fp16)[name = string("q_133_cast_fp16")]; fp16 var_9245_promoted_to_fp16 = const()[name = string("op_9245_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9251_cast_fp16 = pow(x = var_9208_cast_fp16, y = var_9245_promoted_to_fp16)[name = string("op_9251_cast_fp16")]; bool variance_181_keep_dims_0 = const()[name = string("variance_181_keep_dims_0"), val = bool(true)]; tensor const_372 = const()[name = string("const_372"), val = tensor([3])]; tensor variance_181_cast_fp16 = reduce_mean(axes = const_372, keep_dims = variance_181_keep_dims_0, x = var_9251_cast_fp16)[name = string("variance_181_cast_fp16")]; tensor const_373_to_fp16 = const()[name = string("const_373_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350447040)))]; tensor var_9255_cast_fp16 = mul(x = const_373_to_fp16, y = var_9208_cast_fp16)[name = string("op_9255_cast_fp16")]; fp16 var_9256_to_fp16 = const()[name = string("op_9256_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9257_cast_fp16 = add(x = variance_181_cast_fp16, y = var_9256_to_fp16)[name = string("op_9257_cast_fp16")]; fp32 var_9258_epsilon_0 = const()[name = string("op_9258_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9258_cast_fp16 = rsqrt(epsilon = var_9258_epsilon_0, x = var_9257_cast_fp16)[name = string("op_9258_cast_fp16")]; tensor k_133_cast_fp16 = mul(x = var_9255_cast_fp16, y = var_9258_cast_fp16)[name = string("k_133_cast_fp16")]; tensor var_9273_cast_fp16 = mul(x = q_133_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9273_cast_fp16")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_133_cast_fp16)[name = string("x1_89_cast_fp16")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_133_cast_fp16)[name = string("x2_89_cast_fp16")]; fp16 const_226_promoted_to_fp16 = const()[name = string("const_226_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9294_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_226_promoted_to_fp16)[name = string("op_9294_cast_fp16")]; int32 var_9296 = const()[name = string("op_9296"), val = int32(-1)]; bool var_9297_interleave_0 = const()[name = string("op_9297_interleave_0"), val = bool(false)]; tensor var_9297_cast_fp16 = concat(axis = var_9296, interleave = var_9297_interleave_0, values = (var_9294_cast_fp16, x1_89_cast_fp16))[name = string("op_9297_cast_fp16")]; tensor var_9298_cast_fp16 = mul(x = var_9297_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9298_cast_fp16")]; tensor q_137_cast_fp16 = add(x = var_9273_cast_fp16, y = var_9298_cast_fp16)[name = string("q_137_cast_fp16")]; tensor var_9301_cast_fp16 = mul(x = k_133_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9301_cast_fp16")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_133_cast_fp16)[name = string("x1_91_cast_fp16")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_133_cast_fp16)[name = string("x2_91_cast_fp16")]; fp16 const_229_promoted_to_fp16 = const()[name = string("const_229_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9322_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_229_promoted_to_fp16)[name = string("op_9322_cast_fp16")]; int32 var_9324 = const()[name = string("op_9324"), val = int32(-1)]; bool var_9325_interleave_0 = const()[name = string("op_9325_interleave_0"), val = bool(false)]; tensor var_9325_cast_fp16 = concat(axis = var_9324, interleave = var_9325_interleave_0, values = (var_9322_cast_fp16, x1_91_cast_fp16))[name = string("op_9325_cast_fp16")]; tensor var_9326_cast_fp16 = mul(x = var_9325_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9326_cast_fp16")]; tensor k_137_cast_fp16 = add(x = var_9301_cast_fp16, y = var_9326_cast_fp16)[name = string("k_137_cast_fp16")]; tensor var_9333 = const()[name = string("op_9333"), val = tensor([1, 1024, 1, 1])]; tensor nk_45_cast_fp16 = reshape(shape = var_9333, x = k_137_cast_fp16)[name = string("nk_45_cast_fp16")]; tensor var_9339 = const()[name = string("op_9339"), val = tensor([1, 1024, 1, 1])]; tensor nv_45_cast_fp16 = reshape(shape = var_9339, x = linear_156_cast_fp16)[name = string("nv_45_cast_fp16")]; tensor var_9344_cast_fp16 = mul(x = var_9133_cast_fp16, y = var_1203_cast_fp16)[name = string("op_9344_cast_fp16")]; tensor var_9345_cast_fp16 = mul(x = nk_45_cast_fp16, y = update_mask_cast_fp16)[name = string("op_9345_cast_fp16")]; tensor lkc_91_cast_fp16 = add(x = var_9344_cast_fp16, y = var_9345_cast_fp16)[name = string("lkc_91_cast_fp16")]; tensor var_9351_cast_fp16 = mul(x = var_9153_cast_fp16, y = var_1203_cast_fp16)[name = string("op_9351_cast_fp16")]; tensor var_9352_cast_fp16 = mul(x = nv_45_cast_fp16, y = update_mask_cast_fp16)[name = string("op_9352_cast_fp16")]; tensor lvc_91_cast_fp16 = add(x = var_9351_cast_fp16, y = var_9352_cast_fp16)[name = string("lvc_91_cast_fp16")]; tensor var_9356_axes_0 = const()[name = string("op_9356_axes_0"), val = tensor([2])]; tensor var_9356_cast_fp16 = squeeze(axes = var_9356_axes_0, x = lkc_91_cast_fp16)[name = string("op_9356_cast_fp16")]; tensor var_9361 = const()[name = string("op_9361"), val = tensor([1, 8, 128, 256])]; tensor kc_89_cast_fp16 = reshape(shape = var_9361, x = var_9356_cast_fp16)[name = string("kc_89_cast_fp16")]; tensor var_9364_axes_0 = const()[name = string("op_9364_axes_0"), val = tensor([2])]; tensor var_9364_cast_fp16 = squeeze(axes = var_9364_axes_0, x = lvc_91_cast_fp16)[name = string("op_9364_cast_fp16")]; tensor var_9369 = const()[name = string("op_9369"), val = tensor([1, 8, 128, 256])]; tensor vc_89_cast_fp16 = reshape(shape = var_9369, x = var_9364_cast_fp16)[name = string("vc_89_cast_fp16")]; tensor var_9372_axes_0 = const()[name = string("op_9372_axes_0"), val = tensor([2])]; tensor var_9372_cast_fp16 = expand_dims(axes = var_9372_axes_0, x = kc_89_cast_fp16)[name = string("op_9372_cast_fp16")]; tensor var_9380_reps_0 = const()[name = string("op_9380_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9380_cast_fp16 = tile(reps = var_9380_reps_0, x = var_9372_cast_fp16)[name = string("op_9380_cast_fp16")]; tensor var_9385 = const()[name = string("op_9385"), val = tensor([1, 16, 128, 256])]; tensor kc_91_cast_fp16 = reshape(shape = var_9385, x = var_9380_cast_fp16)[name = string("kc_91_cast_fp16")]; tensor var_9388_axes_0 = const()[name = string("op_9388_axes_0"), val = tensor([2])]; tensor var_9388_cast_fp16 = expand_dims(axes = var_9388_axes_0, x = vc_89_cast_fp16)[name = string("op_9388_cast_fp16")]; tensor var_9396_reps_0 = const()[name = string("op_9396_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9396_cast_fp16 = tile(reps = var_9396_reps_0, x = var_9388_cast_fp16)[name = string("op_9396_cast_fp16")]; tensor var_9401 = const()[name = string("op_9401"), val = tensor([1, 16, 128, 256])]; tensor vc_91_cast_fp16 = reshape(shape = var_9401, x = var_9396_cast_fp16)[name = string("vc_91_cast_fp16")]; tensor var_9405_perm_0 = const()[name = string("op_9405_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_9406_transpose_x_0 = const()[name = string("op_9406_transpose_x_0"), val = bool(false)]; bool var_9406_transpose_y_0 = const()[name = string("op_9406_transpose_y_0"), val = bool(false)]; tensor var_9405_cast_fp16 = transpose(perm = var_9405_perm_0, x = q_137_cast_fp16)[name = string("transpose_11")]; tensor var_9406_cast_fp16 = matmul(transpose_x = var_9406_transpose_x_0, transpose_y = var_9406_transpose_y_0, x = var_9405_cast_fp16, y = kc_91_cast_fp16)[name = string("op_9406_cast_fp16")]; fp16 _inversed_aw_177_y_0_to_fp16 = const()[name = string("_inversed_aw_177_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_177_cast_fp16 = mul(x = var_9406_cast_fp16, y = _inversed_aw_177_y_0_to_fp16)[name = string("_inversed_aw_177_cast_fp16")]; tensor aw_179_cast_fp16 = add(x = _inversed_aw_177_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_179_cast_fp16")]; int32 var_9420 = const()[name = string("op_9420"), val = int32(-1)]; tensor aw_183_cast_fp16 = softmax(axis = var_9420, x = aw_179_cast_fp16)[name = string("aw_183_cast_fp16")]; bool var_9426_transpose_x_1 = const()[name = string("op_9426_transpose_x_1"), val = bool(false)]; bool var_9426_transpose_y_1 = const()[name = string("op_9426_transpose_y_1"), val = bool(true)]; tensor var_9426_cast_fp16 = matmul(transpose_x = var_9426_transpose_x_1, transpose_y = var_9426_transpose_y_1, x = aw_183_cast_fp16, y = vc_91_cast_fp16)[name = string("op_9426_cast_fp16")]; tensor var_9429_perm_0 = const()[name = string("op_9429_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9433 = const()[name = string("op_9433"), val = tensor([1, 1, -1])]; tensor var_9429_cast_fp16 = transpose(perm = var_9429_perm_0, x = var_9426_cast_fp16)[name = string("transpose_10")]; tensor input_223_cast_fp16 = reshape(shape = var_9433, x = var_9429_cast_fp16)[name = string("input_223_cast_fp16")]; tensor layers_22_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350447360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352544576))))[name = string("layers_22_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_self_attn_o_proj_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor var_9439_axes_0 = const()[name = string("op_9439_axes_0"), val = tensor([0])]; tensor var_9439_cast_fp16 = squeeze(axes = var_9439_axes_0, x = linear_157_cast_fp16)[name = string("op_9439_cast_fp16")]; tensor var_9441_axes_0 = const()[name = string("op_9441_axes_0"), val = tensor([0])]; tensor var_9441_cast_fp16 = squeeze(axes = var_9441_axes_0, x = var_9439_cast_fp16)[name = string("op_9441_cast_fp16")]; tensor var_9443_axes_0 = const()[name = string("op_9443_axes_0"), val = tensor([-1])]; tensor var_9443_cast_fp16 = expand_dims(axes = var_9443_axes_0, x = var_9441_cast_fp16)[name = string("op_9443_cast_fp16")]; tensor ao_45_axes_0 = const()[name = string("ao_45_axes_0"), val = tensor([-1])]; tensor ao_45_cast_fp16 = expand_dims(axes = ao_45_axes_0, x = var_9443_cast_fp16)[name = string("ao_45_cast_fp16")]; tensor hidden_89_cast_fp16 = add(x = hidden_87_cast_fp16, y = ao_45_cast_fp16)[name = string("hidden_89_cast_fp16")]; tensor var_9449_axes_0 = const()[name = string("op_9449_axes_0"), val = tensor([-1])]; tensor var_9449_cast_fp16 = squeeze(axes = var_9449_axes_0, x = hidden_89_cast_fp16)[name = string("op_9449_cast_fp16")]; tensor var_9451_axes_0 = const()[name = string("op_9451_axes_0"), val = tensor([-1])]; tensor var_9451_cast_fp16 = squeeze(axes = var_9451_axes_0, x = var_9449_cast_fp16)[name = string("op_9451_cast_fp16")]; tensor hidden_states_365_axes_0 = const()[name = string("hidden_states_365_axes_0"), val = tensor([0])]; tensor hidden_states_365_cast_fp16 = expand_dims(axes = hidden_states_365_axes_0, x = var_9451_cast_fp16)[name = string("hidden_states_365_cast_fp16")]; fp16 var_9457_promoted_to_fp16 = const()[name = string("op_9457_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9463_cast_fp16 = pow(x = hidden_states_365_cast_fp16, y = var_9457_promoted_to_fp16)[name = string("op_9463_cast_fp16")]; tensor variance_183_axes_0 = const()[name = string("variance_183_axes_0"), val = tensor([-1])]; bool variance_183_keep_dims_0 = const()[name = string("variance_183_keep_dims_0"), val = bool(true)]; tensor variance_183_cast_fp16 = reduce_mean(axes = variance_183_axes_0, keep_dims = variance_183_keep_dims_0, x = var_9463_cast_fp16)[name = string("variance_183_cast_fp16")]; tensor const_230_to_fp16 = const()[name = string("const_230_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352545152)))]; tensor var_9467_cast_fp16 = mul(x = const_230_to_fp16, y = hidden_states_365_cast_fp16)[name = string("op_9467_cast_fp16")]; fp16 var_9468_to_fp16 = const()[name = string("op_9468_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9469_cast_fp16 = add(x = variance_183_cast_fp16, y = var_9468_to_fp16)[name = string("op_9469_cast_fp16")]; fp32 var_9470_epsilon_0 = const()[name = string("op_9470_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9470_cast_fp16 = rsqrt(epsilon = var_9470_epsilon_0, x = var_9469_cast_fp16)[name = string("op_9470_cast_fp16")]; tensor input_225_cast_fp16 = mul(x = var_9467_cast_fp16, y = var_9470_cast_fp16)[name = string("input_225_cast_fp16")]; tensor layers_22_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352547264))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355693056))))[name = string("layers_22_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_gate_proj_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = string("linear_158_cast_fp16")]; tensor var_9478_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_9478_cast_fp16")]; tensor layers_22_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355693632))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358839424))))[name = string("layers_22_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_22_mlp_up_proj_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor input_229_cast_fp16 = mul(x = var_9478_cast_fp16, y = linear_159_cast_fp16)[name = string("input_229_cast_fp16")]; tensor layers_22_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358840000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361985792))))[name = string("layers_22_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_160_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_22_mlp_down_proj_weight_to_fp16_palettized, x = input_229_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor var_9485_axes_0 = const()[name = string("op_9485_axes_0"), val = tensor([0])]; tensor var_9485_cast_fp16 = squeeze(axes = var_9485_axes_0, x = linear_160_cast_fp16)[name = string("op_9485_cast_fp16")]; tensor var_9487_axes_0 = const()[name = string("op_9487_axes_0"), val = tensor([0])]; tensor var_9487_cast_fp16 = squeeze(axes = var_9487_axes_0, x = var_9485_cast_fp16)[name = string("op_9487_cast_fp16")]; tensor var_9489_axes_0 = const()[name = string("op_9489_axes_0"), val = tensor([-1])]; tensor var_9489_cast_fp16 = expand_dims(axes = var_9489_axes_0, x = var_9487_cast_fp16)[name = string("op_9489_cast_fp16")]; tensor h_45_axes_0 = const()[name = string("h_45_axes_0"), val = tensor([-1])]; tensor h_45_cast_fp16 = expand_dims(axes = h_45_axes_0, x = var_9489_cast_fp16)[name = string("h_45_cast_fp16")]; tensor hidden_91_cast_fp16 = add(x = hidden_89_cast_fp16, y = h_45_cast_fp16)[name = string("hidden_91_cast_fp16")]; tensor var_9503_begin_0 = const()[name = string("op_9503_begin_0"), val = tensor([0, 23552, 0, 0])]; tensor var_9503_end_0 = const()[name = string("op_9503_end_0"), val = tensor([1, 24576, 1, 256])]; tensor var_9503_end_mask_0 = const()[name = string("op_9503_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9503_cast_fp16 = slice_by_index(begin = var_9503_begin_0, end = var_9503_end_0, end_mask = var_9503_end_mask_0, x = key_cache)[name = string("op_9503_cast_fp16")]; tensor var_9523_begin_0 = const()[name = string("op_9523_begin_0"), val = tensor([0, 23552, 0, 0])]; tensor var_9523_end_0 = const()[name = string("op_9523_end_0"), val = tensor([1, 24576, 1, 256])]; tensor var_9523_end_mask_0 = const()[name = string("op_9523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9523_cast_fp16 = slice_by_index(begin = var_9523_begin_0, end = var_9523_end_0, end_mask = var_9523_end_mask_0, x = value_cache)[name = string("op_9523_cast_fp16")]; tensor var_9535_axes_0 = const()[name = string("op_9535_axes_0"), val = tensor([-1])]; tensor var_9535_cast_fp16 = squeeze(axes = var_9535_axes_0, x = hidden_91_cast_fp16)[name = string("op_9535_cast_fp16")]; tensor var_9537_axes_0 = const()[name = string("op_9537_axes_0"), val = tensor([-1])]; tensor var_9537_cast_fp16 = squeeze(axes = var_9537_axes_0, x = var_9535_cast_fp16)[name = string("op_9537_cast_fp16")]; tensor hidden_states_369_axes_0 = const()[name = string("hidden_states_369_axes_0"), val = tensor([0])]; tensor hidden_states_369_cast_fp16 = expand_dims(axes = hidden_states_369_axes_0, x = var_9537_cast_fp16)[name = string("hidden_states_369_cast_fp16")]; fp16 var_9543_promoted_to_fp16 = const()[name = string("op_9543_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9549_cast_fp16 = pow(x = hidden_states_369_cast_fp16, y = var_9543_promoted_to_fp16)[name = string("op_9549_cast_fp16")]; tensor variance_185_axes_0 = const()[name = string("variance_185_axes_0"), val = tensor([-1])]; bool variance_185_keep_dims_0 = const()[name = string("variance_185_keep_dims_0"), val = bool(true)]; tensor variance_185_cast_fp16 = reduce_mean(axes = variance_185_axes_0, keep_dims = variance_185_keep_dims_0, x = var_9549_cast_fp16)[name = string("variance_185_cast_fp16")]; tensor const_231_to_fp16 = const()[name = string("const_231_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361986368)))]; tensor var_9553_cast_fp16 = mul(x = const_231_to_fp16, y = hidden_states_369_cast_fp16)[name = string("op_9553_cast_fp16")]; fp16 var_9554_to_fp16 = const()[name = string("op_9554_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9555_cast_fp16 = add(x = variance_185_cast_fp16, y = var_9554_to_fp16)[name = string("op_9555_cast_fp16")]; fp32 var_9556_epsilon_0 = const()[name = string("op_9556_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9556_cast_fp16 = rsqrt(epsilon = var_9556_epsilon_0, x = var_9555_cast_fp16)[name = string("op_9556_cast_fp16")]; tensor input_231_cast_fp16 = mul(x = var_9553_cast_fp16, y = var_9556_cast_fp16)[name = string("input_231_cast_fp16")]; tensor layers_23_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361988480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364085696))))[name = string("layers_23_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_23_self_attn_q_proj_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor var_9565 = const()[name = string("op_9565"), val = tensor([1, 1, 16, 128])]; tensor var_9566_cast_fp16 = reshape(shape = var_9565, x = linear_161_cast_fp16)[name = string("op_9566_cast_fp16")]; tensor layers_23_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(364086272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365134912))))[name = string("layers_23_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_k_proj_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor var_9577 = const()[name = string("op_9577"), val = tensor([1, 1, 8, 128])]; tensor var_9578_cast_fp16 = reshape(shape = var_9577, x = linear_162_cast_fp16)[name = string("op_9578_cast_fp16")]; tensor layers_23_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365135488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366184128))))[name = string("layers_23_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_v_proj_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("linear_163_cast_fp16")]; fp16 var_9597_promoted_to_fp16 = const()[name = string("op_9597_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9603_cast_fp16 = pow(x = var_9566_cast_fp16, y = var_9597_promoted_to_fp16)[name = string("op_9603_cast_fp16")]; bool variance_187_keep_dims_0 = const()[name = string("variance_187_keep_dims_0"), val = bool(true)]; tensor const_374 = const()[name = string("const_374"), val = tensor([3])]; tensor variance_187_cast_fp16 = reduce_mean(axes = const_374, keep_dims = variance_187_keep_dims_0, x = var_9603_cast_fp16)[name = string("variance_187_cast_fp16")]; tensor const_375_to_fp16 = const()[name = string("const_375_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366184704)))]; tensor var_9607_cast_fp16 = mul(x = const_375_to_fp16, y = var_9566_cast_fp16)[name = string("op_9607_cast_fp16")]; fp16 var_9608_to_fp16 = const()[name = string("op_9608_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9609_cast_fp16 = add(x = variance_187_cast_fp16, y = var_9608_to_fp16)[name = string("op_9609_cast_fp16")]; fp32 var_9610_epsilon_0 = const()[name = string("op_9610_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9610_cast_fp16 = rsqrt(epsilon = var_9610_epsilon_0, x = var_9609_cast_fp16)[name = string("op_9610_cast_fp16")]; tensor q_139_cast_fp16 = mul(x = var_9607_cast_fp16, y = var_9610_cast_fp16)[name = string("q_139_cast_fp16")]; fp16 var_9615_promoted_to_fp16 = const()[name = string("op_9615_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9621_cast_fp16 = pow(x = var_9578_cast_fp16, y = var_9615_promoted_to_fp16)[name = string("op_9621_cast_fp16")]; bool variance_189_keep_dims_0 = const()[name = string("variance_189_keep_dims_0"), val = bool(true)]; tensor const_376 = const()[name = string("const_376"), val = tensor([3])]; tensor variance_189_cast_fp16 = reduce_mean(axes = const_376, keep_dims = variance_189_keep_dims_0, x = var_9621_cast_fp16)[name = string("variance_189_cast_fp16")]; tensor const_377_to_fp16 = const()[name = string("const_377_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366185024)))]; tensor var_9625_cast_fp16 = mul(x = const_377_to_fp16, y = var_9578_cast_fp16)[name = string("op_9625_cast_fp16")]; fp16 var_9626_to_fp16 = const()[name = string("op_9626_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9627_cast_fp16 = add(x = variance_189_cast_fp16, y = var_9626_to_fp16)[name = string("op_9627_cast_fp16")]; fp32 var_9628_epsilon_0 = const()[name = string("op_9628_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9628_cast_fp16 = rsqrt(epsilon = var_9628_epsilon_0, x = var_9627_cast_fp16)[name = string("op_9628_cast_fp16")]; tensor k_139_cast_fp16 = mul(x = var_9625_cast_fp16, y = var_9628_cast_fp16)[name = string("k_139_cast_fp16")]; tensor var_9643_cast_fp16 = mul(x = q_139_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9643_cast_fp16")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_139_cast_fp16)[name = string("x1_93_cast_fp16")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_139_cast_fp16)[name = string("x2_93_cast_fp16")]; fp16 const_236_promoted_to_fp16 = const()[name = string("const_236_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9664_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_236_promoted_to_fp16)[name = string("op_9664_cast_fp16")]; int32 var_9666 = const()[name = string("op_9666"), val = int32(-1)]; bool var_9667_interleave_0 = const()[name = string("op_9667_interleave_0"), val = bool(false)]; tensor var_9667_cast_fp16 = concat(axis = var_9666, interleave = var_9667_interleave_0, values = (var_9664_cast_fp16, x1_93_cast_fp16))[name = string("op_9667_cast_fp16")]; tensor var_9668_cast_fp16 = mul(x = var_9667_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9668_cast_fp16")]; tensor q_143_cast_fp16 = add(x = var_9643_cast_fp16, y = var_9668_cast_fp16)[name = string("q_143_cast_fp16")]; tensor var_9671_cast_fp16 = mul(x = k_139_cast_fp16, y = cos_1_cast_fp16)[name = string("op_9671_cast_fp16")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_139_cast_fp16)[name = string("x1_95_cast_fp16")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_139_cast_fp16)[name = string("x2_95_cast_fp16")]; fp16 const_239_promoted_to_fp16 = const()[name = string("const_239_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_9692_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_239_promoted_to_fp16)[name = string("op_9692_cast_fp16")]; int32 var_9694 = const()[name = string("op_9694"), val = int32(-1)]; bool var_9695_interleave_0 = const()[name = string("op_9695_interleave_0"), val = bool(false)]; tensor var_9695_cast_fp16 = concat(axis = var_9694, interleave = var_9695_interleave_0, values = (var_9692_cast_fp16, x1_95_cast_fp16))[name = string("op_9695_cast_fp16")]; tensor var_9696_cast_fp16 = mul(x = var_9695_cast_fp16, y = sin_1_cast_fp16)[name = string("op_9696_cast_fp16")]; tensor k_143_cast_fp16 = add(x = var_9671_cast_fp16, y = var_9696_cast_fp16)[name = string("k_143_cast_fp16")]; tensor var_9703 = const()[name = string("op_9703"), val = tensor([1, 1024, 1, 1])]; tensor nk_47_cast_fp16 = reshape(shape = var_9703, x = k_143_cast_fp16)[name = string("nk_47_cast_fp16")]; tensor var_9709 = const()[name = string("op_9709"), val = tensor([1, 1024, 1, 1])]; tensor nv_47_cast_fp16 = reshape(shape = var_9709, x = linear_163_cast_fp16)[name = string("nv_47_cast_fp16")]; tensor var_9714_cast_fp16 = mul(x = var_9503_cast_fp16, y = var_1203_cast_fp16)[name = string("op_9714_cast_fp16")]; tensor var_9715_cast_fp16 = mul(x = nk_47_cast_fp16, y = update_mask_cast_fp16)[name = string("op_9715_cast_fp16")]; tensor lkc_95_cast_fp16 = add(x = var_9714_cast_fp16, y = var_9715_cast_fp16)[name = string("lkc_95_cast_fp16")]; tensor var_9721_cast_fp16 = mul(x = var_9523_cast_fp16, y = var_1203_cast_fp16)[name = string("op_9721_cast_fp16")]; tensor var_9722_cast_fp16 = mul(x = nv_47_cast_fp16, y = update_mask_cast_fp16)[name = string("op_9722_cast_fp16")]; tensor lvc_95_cast_fp16 = add(x = var_9721_cast_fp16, y = var_9722_cast_fp16)[name = string("lvc_95_cast_fp16")]; tensor var_9726_axes_0 = const()[name = string("op_9726_axes_0"), val = tensor([2])]; tensor var_9726_cast_fp16 = squeeze(axes = var_9726_axes_0, x = lkc_95_cast_fp16)[name = string("op_9726_cast_fp16")]; tensor var_9731 = const()[name = string("op_9731"), val = tensor([1, 8, 128, 256])]; tensor kc_93_cast_fp16 = reshape(shape = var_9731, x = var_9726_cast_fp16)[name = string("kc_93_cast_fp16")]; tensor var_9734_axes_0 = const()[name = string("op_9734_axes_0"), val = tensor([2])]; tensor var_9734_cast_fp16 = squeeze(axes = var_9734_axes_0, x = lvc_95_cast_fp16)[name = string("op_9734_cast_fp16")]; tensor var_9739 = const()[name = string("op_9739"), val = tensor([1, 8, 128, 256])]; tensor vc_93_cast_fp16 = reshape(shape = var_9739, x = var_9734_cast_fp16)[name = string("vc_93_cast_fp16")]; tensor var_9742_axes_0 = const()[name = string("op_9742_axes_0"), val = tensor([2])]; tensor var_9742_cast_fp16 = expand_dims(axes = var_9742_axes_0, x = kc_93_cast_fp16)[name = string("op_9742_cast_fp16")]; tensor var_9750_reps_0 = const()[name = string("op_9750_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9750_cast_fp16 = tile(reps = var_9750_reps_0, x = var_9742_cast_fp16)[name = string("op_9750_cast_fp16")]; tensor var_9755 = const()[name = string("op_9755"), val = tensor([1, 16, 128, 256])]; tensor kc_95_cast_fp16 = reshape(shape = var_9755, x = var_9750_cast_fp16)[name = string("kc_95_cast_fp16")]; tensor var_9758_axes_0 = const()[name = string("op_9758_axes_0"), val = tensor([2])]; tensor var_9758_cast_fp16 = expand_dims(axes = var_9758_axes_0, x = vc_93_cast_fp16)[name = string("op_9758_cast_fp16")]; tensor var_9766_reps_0 = const()[name = string("op_9766_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_9766_cast_fp16 = tile(reps = var_9766_reps_0, x = var_9758_cast_fp16)[name = string("op_9766_cast_fp16")]; tensor var_9771 = const()[name = string("op_9771"), val = tensor([1, 16, 128, 256])]; tensor vc_95_cast_fp16 = reshape(shape = var_9771, x = var_9766_cast_fp16)[name = string("vc_95_cast_fp16")]; tensor var_9775_perm_0 = const()[name = string("op_9775_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_9776_transpose_x_0 = const()[name = string("op_9776_transpose_x_0"), val = bool(false)]; bool var_9776_transpose_y_0 = const()[name = string("op_9776_transpose_y_0"), val = bool(false)]; tensor var_9775_cast_fp16 = transpose(perm = var_9775_perm_0, x = q_143_cast_fp16)[name = string("transpose_9")]; tensor var_9776_cast_fp16 = matmul(transpose_x = var_9776_transpose_x_0, transpose_y = var_9776_transpose_y_0, x = var_9775_cast_fp16, y = kc_95_cast_fp16)[name = string("op_9776_cast_fp16")]; fp16 _inversed_aw_185_y_0_to_fp16 = const()[name = string("_inversed_aw_185_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_185_cast_fp16 = mul(x = var_9776_cast_fp16, y = _inversed_aw_185_y_0_to_fp16)[name = string("_inversed_aw_185_cast_fp16")]; tensor aw_187_cast_fp16 = add(x = _inversed_aw_185_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_187_cast_fp16")]; int32 var_9790 = const()[name = string("op_9790"), val = int32(-1)]; tensor aw_191_cast_fp16 = softmax(axis = var_9790, x = aw_187_cast_fp16)[name = string("aw_191_cast_fp16")]; bool var_9796_transpose_x_1 = const()[name = string("op_9796_transpose_x_1"), val = bool(false)]; bool var_9796_transpose_y_1 = const()[name = string("op_9796_transpose_y_1"), val = bool(true)]; tensor var_9796_cast_fp16 = matmul(transpose_x = var_9796_transpose_x_1, transpose_y = var_9796_transpose_y_1, x = aw_191_cast_fp16, y = vc_95_cast_fp16)[name = string("op_9796_cast_fp16")]; tensor var_9799_perm_0 = const()[name = string("op_9799_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_9803 = const()[name = string("op_9803"), val = tensor([1, 1, -1])]; tensor var_9799_cast_fp16 = transpose(perm = var_9799_perm_0, x = var_9796_cast_fp16)[name = string("transpose_8")]; tensor input_233_cast_fp16 = reshape(shape = var_9803, x = var_9799_cast_fp16)[name = string("input_233_cast_fp16")]; tensor layers_23_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366185344))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368282560))))[name = string("layers_23_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_164_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_self_attn_o_proj_weight_to_fp16_palettized, x = input_233_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor var_9809_axes_0 = const()[name = string("op_9809_axes_0"), val = tensor([0])]; tensor var_9809_cast_fp16 = squeeze(axes = var_9809_axes_0, x = linear_164_cast_fp16)[name = string("op_9809_cast_fp16")]; tensor var_9811_axes_0 = const()[name = string("op_9811_axes_0"), val = tensor([0])]; tensor var_9811_cast_fp16 = squeeze(axes = var_9811_axes_0, x = var_9809_cast_fp16)[name = string("op_9811_cast_fp16")]; tensor var_9813_axes_0 = const()[name = string("op_9813_axes_0"), val = tensor([-1])]; tensor var_9813_cast_fp16 = expand_dims(axes = var_9813_axes_0, x = var_9811_cast_fp16)[name = string("op_9813_cast_fp16")]; tensor ao_47_axes_0 = const()[name = string("ao_47_axes_0"), val = tensor([-1])]; tensor ao_47_cast_fp16 = expand_dims(axes = ao_47_axes_0, x = var_9813_cast_fp16)[name = string("ao_47_cast_fp16")]; tensor hidden_93_cast_fp16 = add(x = hidden_91_cast_fp16, y = ao_47_cast_fp16)[name = string("hidden_93_cast_fp16")]; tensor var_9819_axes_0 = const()[name = string("op_9819_axes_0"), val = tensor([-1])]; tensor var_9819_cast_fp16 = squeeze(axes = var_9819_axes_0, x = hidden_93_cast_fp16)[name = string("op_9819_cast_fp16")]; tensor var_9821_axes_0 = const()[name = string("op_9821_axes_0"), val = tensor([-1])]; tensor var_9821_cast_fp16 = squeeze(axes = var_9821_axes_0, x = var_9819_cast_fp16)[name = string("op_9821_cast_fp16")]; tensor hidden_states_381_axes_0 = const()[name = string("hidden_states_381_axes_0"), val = tensor([0])]; tensor hidden_states_381_cast_fp16 = expand_dims(axes = hidden_states_381_axes_0, x = var_9821_cast_fp16)[name = string("hidden_states_381_cast_fp16")]; fp16 var_9827_promoted_to_fp16 = const()[name = string("op_9827_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9833_cast_fp16 = pow(x = hidden_states_381_cast_fp16, y = var_9827_promoted_to_fp16)[name = string("op_9833_cast_fp16")]; tensor variance_191_axes_0 = const()[name = string("variance_191_axes_0"), val = tensor([-1])]; bool variance_191_keep_dims_0 = const()[name = string("variance_191_keep_dims_0"), val = bool(true)]; tensor variance_191_cast_fp16 = reduce_mean(axes = variance_191_axes_0, keep_dims = variance_191_keep_dims_0, x = var_9833_cast_fp16)[name = string("variance_191_cast_fp16")]; tensor const_240_to_fp16 = const()[name = string("const_240_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368283136)))]; tensor var_9837_cast_fp16 = mul(x = const_240_to_fp16, y = hidden_states_381_cast_fp16)[name = string("op_9837_cast_fp16")]; fp16 var_9838_to_fp16 = const()[name = string("op_9838_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9839_cast_fp16 = add(x = variance_191_cast_fp16, y = var_9838_to_fp16)[name = string("op_9839_cast_fp16")]; fp32 var_9840_epsilon_0 = const()[name = string("op_9840_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9840_cast_fp16 = rsqrt(epsilon = var_9840_epsilon_0, x = var_9839_cast_fp16)[name = string("op_9840_cast_fp16")]; tensor input_235_cast_fp16 = mul(x = var_9837_cast_fp16, y = var_9840_cast_fp16)[name = string("input_235_cast_fp16")]; tensor layers_23_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368285248))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371431040))))[name = string("layers_23_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_gate_proj_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor var_9848_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_9848_cast_fp16")]; tensor layers_23_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371431616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374577408))))[name = string("layers_23_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_23_mlp_up_proj_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = string("linear_166_cast_fp16")]; tensor input_239_cast_fp16 = mul(x = var_9848_cast_fp16, y = linear_166_cast_fp16)[name = string("input_239_cast_fp16")]; tensor layers_23_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374577984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377723776))))[name = string("layers_23_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_167_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_23_mlp_down_proj_weight_to_fp16_palettized, x = input_239_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor var_9855_axes_0 = const()[name = string("op_9855_axes_0"), val = tensor([0])]; tensor var_9855_cast_fp16 = squeeze(axes = var_9855_axes_0, x = linear_167_cast_fp16)[name = string("op_9855_cast_fp16")]; tensor var_9857_axes_0 = const()[name = string("op_9857_axes_0"), val = tensor([0])]; tensor var_9857_cast_fp16 = squeeze(axes = var_9857_axes_0, x = var_9855_cast_fp16)[name = string("op_9857_cast_fp16")]; tensor var_9859_axes_0 = const()[name = string("op_9859_axes_0"), val = tensor([-1])]; tensor var_9859_cast_fp16 = expand_dims(axes = var_9859_axes_0, x = var_9857_cast_fp16)[name = string("op_9859_cast_fp16")]; tensor h_47_axes_0 = const()[name = string("h_47_axes_0"), val = tensor([-1])]; tensor h_47_cast_fp16 = expand_dims(axes = h_47_axes_0, x = var_9859_cast_fp16)[name = string("h_47_cast_fp16")]; tensor hidden_95_cast_fp16 = add(x = hidden_93_cast_fp16, y = h_47_cast_fp16)[name = string("hidden_95_cast_fp16")]; tensor var_9873_begin_0 = const()[name = string("op_9873_begin_0"), val = tensor([0, 24576, 0, 0])]; tensor var_9873_end_0 = const()[name = string("op_9873_end_0"), val = tensor([1, 25600, 1, 256])]; tensor var_9873_end_mask_0 = const()[name = string("op_9873_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9873_cast_fp16 = slice_by_index(begin = var_9873_begin_0, end = var_9873_end_0, end_mask = var_9873_end_mask_0, x = key_cache)[name = string("op_9873_cast_fp16")]; tensor var_9893_begin_0 = const()[name = string("op_9893_begin_0"), val = tensor([0, 24576, 0, 0])]; tensor var_9893_end_0 = const()[name = string("op_9893_end_0"), val = tensor([1, 25600, 1, 256])]; tensor var_9893_end_mask_0 = const()[name = string("op_9893_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9893_cast_fp16 = slice_by_index(begin = var_9893_begin_0, end = var_9893_end_0, end_mask = var_9893_end_mask_0, x = value_cache)[name = string("op_9893_cast_fp16")]; tensor var_9905_axes_0 = const()[name = string("op_9905_axes_0"), val = tensor([-1])]; tensor var_9905_cast_fp16 = squeeze(axes = var_9905_axes_0, x = hidden_95_cast_fp16)[name = string("op_9905_cast_fp16")]; tensor var_9907_axes_0 = const()[name = string("op_9907_axes_0"), val = tensor([-1])]; tensor var_9907_cast_fp16 = squeeze(axes = var_9907_axes_0, x = var_9905_cast_fp16)[name = string("op_9907_cast_fp16")]; tensor hidden_states_385_axes_0 = const()[name = string("hidden_states_385_axes_0"), val = tensor([0])]; tensor hidden_states_385_cast_fp16 = expand_dims(axes = hidden_states_385_axes_0, x = var_9907_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; fp16 var_9913_promoted_to_fp16 = const()[name = string("op_9913_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9919_cast_fp16 = pow(x = hidden_states_385_cast_fp16, y = var_9913_promoted_to_fp16)[name = string("op_9919_cast_fp16")]; tensor variance_193_axes_0 = const()[name = string("variance_193_axes_0"), val = tensor([-1])]; bool variance_193_keep_dims_0 = const()[name = string("variance_193_keep_dims_0"), val = bool(true)]; tensor variance_193_cast_fp16 = reduce_mean(axes = variance_193_axes_0, keep_dims = variance_193_keep_dims_0, x = var_9919_cast_fp16)[name = string("variance_193_cast_fp16")]; tensor const_241_to_fp16 = const()[name = string("const_241_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377724352)))]; tensor var_9923_cast_fp16 = mul(x = const_241_to_fp16, y = hidden_states_385_cast_fp16)[name = string("op_9923_cast_fp16")]; fp16 var_9924_to_fp16 = const()[name = string("op_9924_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9925_cast_fp16 = add(x = variance_193_cast_fp16, y = var_9924_to_fp16)[name = string("op_9925_cast_fp16")]; fp32 var_9926_epsilon_0 = const()[name = string("op_9926_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9926_cast_fp16 = rsqrt(epsilon = var_9926_epsilon_0, x = var_9925_cast_fp16)[name = string("op_9926_cast_fp16")]; tensor input_241_cast_fp16 = mul(x = var_9923_cast_fp16, y = var_9926_cast_fp16)[name = string("input_241_cast_fp16")]; tensor layers_24_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377726464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379823680))))[name = string("layers_24_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_24_self_attn_q_proj_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor var_9935 = const()[name = string("op_9935"), val = tensor([1, 1, 16, 128])]; tensor var_9936_cast_fp16 = reshape(shape = var_9935, x = linear_168_cast_fp16)[name = string("op_9936_cast_fp16")]; tensor layers_24_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379824256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380872896))))[name = string("layers_24_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_k_proj_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor var_9947 = const()[name = string("op_9947"), val = tensor([1, 1, 8, 128])]; tensor var_9948_cast_fp16 = reshape(shape = var_9947, x = linear_169_cast_fp16)[name = string("op_9948_cast_fp16")]; tensor layers_24_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380873472))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381922112))))[name = string("layers_24_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_v_proj_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("linear_170_cast_fp16")]; fp16 var_9967_promoted_to_fp16 = const()[name = string("op_9967_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9973_cast_fp16 = pow(x = var_9936_cast_fp16, y = var_9967_promoted_to_fp16)[name = string("op_9973_cast_fp16")]; bool variance_195_keep_dims_0 = const()[name = string("variance_195_keep_dims_0"), val = bool(true)]; tensor const_378 = const()[name = string("const_378"), val = tensor([3])]; tensor variance_195_cast_fp16 = reduce_mean(axes = const_378, keep_dims = variance_195_keep_dims_0, x = var_9973_cast_fp16)[name = string("variance_195_cast_fp16")]; tensor const_379_to_fp16 = const()[name = string("const_379_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381922688)))]; tensor var_9977_cast_fp16 = mul(x = const_379_to_fp16, y = var_9936_cast_fp16)[name = string("op_9977_cast_fp16")]; fp16 var_9978_to_fp16 = const()[name = string("op_9978_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9979_cast_fp16 = add(x = variance_195_cast_fp16, y = var_9978_to_fp16)[name = string("op_9979_cast_fp16")]; fp32 var_9980_epsilon_0 = const()[name = string("op_9980_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9980_cast_fp16 = rsqrt(epsilon = var_9980_epsilon_0, x = var_9979_cast_fp16)[name = string("op_9980_cast_fp16")]; tensor q_145_cast_fp16 = mul(x = var_9977_cast_fp16, y = var_9980_cast_fp16)[name = string("q_145_cast_fp16")]; fp16 var_9985_promoted_to_fp16 = const()[name = string("op_9985_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_9991_cast_fp16 = pow(x = var_9948_cast_fp16, y = var_9985_promoted_to_fp16)[name = string("op_9991_cast_fp16")]; bool variance_197_keep_dims_0 = const()[name = string("variance_197_keep_dims_0"), val = bool(true)]; tensor const_380 = const()[name = string("const_380"), val = tensor([3])]; tensor variance_197_cast_fp16 = reduce_mean(axes = const_380, keep_dims = variance_197_keep_dims_0, x = var_9991_cast_fp16)[name = string("variance_197_cast_fp16")]; tensor const_381_to_fp16 = const()[name = string("const_381_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381923008)))]; tensor var_9995_cast_fp16 = mul(x = const_381_to_fp16, y = var_9948_cast_fp16)[name = string("op_9995_cast_fp16")]; fp16 var_9996_to_fp16 = const()[name = string("op_9996_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_9997_cast_fp16 = add(x = variance_197_cast_fp16, y = var_9996_to_fp16)[name = string("op_9997_cast_fp16")]; fp32 var_9998_epsilon_0 = const()[name = string("op_9998_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_9998_cast_fp16 = rsqrt(epsilon = var_9998_epsilon_0, x = var_9997_cast_fp16)[name = string("op_9998_cast_fp16")]; tensor k_145_cast_fp16 = mul(x = var_9995_cast_fp16, y = var_9998_cast_fp16)[name = string("k_145_cast_fp16")]; tensor var_10013_cast_fp16 = mul(x = q_145_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10013_cast_fp16")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_145_cast_fp16)[name = string("x1_97_cast_fp16")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_145_cast_fp16)[name = string("x2_97_cast_fp16")]; fp16 const_246_promoted_to_fp16 = const()[name = string("const_246_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10034_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_246_promoted_to_fp16)[name = string("op_10034_cast_fp16")]; int32 var_10036 = const()[name = string("op_10036"), val = int32(-1)]; bool var_10037_interleave_0 = const()[name = string("op_10037_interleave_0"), val = bool(false)]; tensor var_10037_cast_fp16 = concat(axis = var_10036, interleave = var_10037_interleave_0, values = (var_10034_cast_fp16, x1_97_cast_fp16))[name = string("op_10037_cast_fp16")]; tensor var_10038_cast_fp16 = mul(x = var_10037_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10038_cast_fp16")]; tensor q_149_cast_fp16 = add(x = var_10013_cast_fp16, y = var_10038_cast_fp16)[name = string("q_149_cast_fp16")]; tensor var_10041_cast_fp16 = mul(x = k_145_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10041_cast_fp16")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_145_cast_fp16)[name = string("x1_99_cast_fp16")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_145_cast_fp16)[name = string("x2_99_cast_fp16")]; fp16 const_249_promoted_to_fp16 = const()[name = string("const_249_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10062_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_249_promoted_to_fp16)[name = string("op_10062_cast_fp16")]; int32 var_10064 = const()[name = string("op_10064"), val = int32(-1)]; bool var_10065_interleave_0 = const()[name = string("op_10065_interleave_0"), val = bool(false)]; tensor var_10065_cast_fp16 = concat(axis = var_10064, interleave = var_10065_interleave_0, values = (var_10062_cast_fp16, x1_99_cast_fp16))[name = string("op_10065_cast_fp16")]; tensor var_10066_cast_fp16 = mul(x = var_10065_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10066_cast_fp16")]; tensor k_149_cast_fp16 = add(x = var_10041_cast_fp16, y = var_10066_cast_fp16)[name = string("k_149_cast_fp16")]; tensor var_10073 = const()[name = string("op_10073"), val = tensor([1, 1024, 1, 1])]; tensor nk_49_cast_fp16 = reshape(shape = var_10073, x = k_149_cast_fp16)[name = string("nk_49_cast_fp16")]; tensor var_10079 = const()[name = string("op_10079"), val = tensor([1, 1024, 1, 1])]; tensor nv_49_cast_fp16 = reshape(shape = var_10079, x = linear_170_cast_fp16)[name = string("nv_49_cast_fp16")]; tensor var_10084_cast_fp16 = mul(x = var_9873_cast_fp16, y = var_1203_cast_fp16)[name = string("op_10084_cast_fp16")]; tensor var_10085_cast_fp16 = mul(x = nk_49_cast_fp16, y = update_mask_cast_fp16)[name = string("op_10085_cast_fp16")]; tensor lkc_99_cast_fp16 = add(x = var_10084_cast_fp16, y = var_10085_cast_fp16)[name = string("lkc_99_cast_fp16")]; tensor var_10091_cast_fp16 = mul(x = var_9893_cast_fp16, y = var_1203_cast_fp16)[name = string("op_10091_cast_fp16")]; tensor var_10092_cast_fp16 = mul(x = nv_49_cast_fp16, y = update_mask_cast_fp16)[name = string("op_10092_cast_fp16")]; tensor lvc_99_cast_fp16 = add(x = var_10091_cast_fp16, y = var_10092_cast_fp16)[name = string("lvc_99_cast_fp16")]; tensor var_10096_axes_0 = const()[name = string("op_10096_axes_0"), val = tensor([2])]; tensor var_10096_cast_fp16 = squeeze(axes = var_10096_axes_0, x = lkc_99_cast_fp16)[name = string("op_10096_cast_fp16")]; tensor var_10101 = const()[name = string("op_10101"), val = tensor([1, 8, 128, 256])]; tensor kc_97_cast_fp16 = reshape(shape = var_10101, x = var_10096_cast_fp16)[name = string("kc_97_cast_fp16")]; tensor var_10104_axes_0 = const()[name = string("op_10104_axes_0"), val = tensor([2])]; tensor var_10104_cast_fp16 = squeeze(axes = var_10104_axes_0, x = lvc_99_cast_fp16)[name = string("op_10104_cast_fp16")]; tensor var_10109 = const()[name = string("op_10109"), val = tensor([1, 8, 128, 256])]; tensor vc_97_cast_fp16 = reshape(shape = var_10109, x = var_10104_cast_fp16)[name = string("vc_97_cast_fp16")]; tensor var_10112_axes_0 = const()[name = string("op_10112_axes_0"), val = tensor([2])]; tensor var_10112_cast_fp16 = expand_dims(axes = var_10112_axes_0, x = kc_97_cast_fp16)[name = string("op_10112_cast_fp16")]; tensor var_10120_reps_0 = const()[name = string("op_10120_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10120_cast_fp16 = tile(reps = var_10120_reps_0, x = var_10112_cast_fp16)[name = string("op_10120_cast_fp16")]; tensor var_10125 = const()[name = string("op_10125"), val = tensor([1, 16, 128, 256])]; tensor kc_99_cast_fp16 = reshape(shape = var_10125, x = var_10120_cast_fp16)[name = string("kc_99_cast_fp16")]; tensor var_10128_axes_0 = const()[name = string("op_10128_axes_0"), val = tensor([2])]; tensor var_10128_cast_fp16 = expand_dims(axes = var_10128_axes_0, x = vc_97_cast_fp16)[name = string("op_10128_cast_fp16")]; tensor var_10136_reps_0 = const()[name = string("op_10136_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10136_cast_fp16 = tile(reps = var_10136_reps_0, x = var_10128_cast_fp16)[name = string("op_10136_cast_fp16")]; tensor var_10141 = const()[name = string("op_10141"), val = tensor([1, 16, 128, 256])]; tensor vc_99_cast_fp16 = reshape(shape = var_10141, x = var_10136_cast_fp16)[name = string("vc_99_cast_fp16")]; tensor var_10145_perm_0 = const()[name = string("op_10145_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_10146_transpose_x_0 = const()[name = string("op_10146_transpose_x_0"), val = bool(false)]; bool var_10146_transpose_y_0 = const()[name = string("op_10146_transpose_y_0"), val = bool(false)]; tensor var_10145_cast_fp16 = transpose(perm = var_10145_perm_0, x = q_149_cast_fp16)[name = string("transpose_7")]; tensor var_10146_cast_fp16 = matmul(transpose_x = var_10146_transpose_x_0, transpose_y = var_10146_transpose_y_0, x = var_10145_cast_fp16, y = kc_99_cast_fp16)[name = string("op_10146_cast_fp16")]; fp16 _inversed_aw_193_y_0_to_fp16 = const()[name = string("_inversed_aw_193_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_193_cast_fp16 = mul(x = var_10146_cast_fp16, y = _inversed_aw_193_y_0_to_fp16)[name = string("_inversed_aw_193_cast_fp16")]; tensor aw_195_cast_fp16 = add(x = _inversed_aw_193_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_195_cast_fp16")]; int32 var_10160 = const()[name = string("op_10160"), val = int32(-1)]; tensor aw_199_cast_fp16 = softmax(axis = var_10160, x = aw_195_cast_fp16)[name = string("aw_199_cast_fp16")]; bool var_10166_transpose_x_1 = const()[name = string("op_10166_transpose_x_1"), val = bool(false)]; bool var_10166_transpose_y_1 = const()[name = string("op_10166_transpose_y_1"), val = bool(true)]; tensor var_10166_cast_fp16 = matmul(transpose_x = var_10166_transpose_x_1, transpose_y = var_10166_transpose_y_1, x = aw_199_cast_fp16, y = vc_99_cast_fp16)[name = string("op_10166_cast_fp16")]; tensor var_10169_perm_0 = const()[name = string("op_10169_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10173 = const()[name = string("op_10173"), val = tensor([1, 1, -1])]; tensor var_10169_cast_fp16 = transpose(perm = var_10169_perm_0, x = var_10166_cast_fp16)[name = string("transpose_6")]; tensor input_243_cast_fp16 = reshape(shape = var_10173, x = var_10169_cast_fp16)[name = string("input_243_cast_fp16")]; tensor layers_24_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(381923328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384020544))))[name = string("layers_24_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_171_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_self_attn_o_proj_weight_to_fp16_palettized, x = input_243_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor var_10179_axes_0 = const()[name = string("op_10179_axes_0"), val = tensor([0])]; tensor var_10179_cast_fp16 = squeeze(axes = var_10179_axes_0, x = linear_171_cast_fp16)[name = string("op_10179_cast_fp16")]; tensor var_10181_axes_0 = const()[name = string("op_10181_axes_0"), val = tensor([0])]; tensor var_10181_cast_fp16 = squeeze(axes = var_10181_axes_0, x = var_10179_cast_fp16)[name = string("op_10181_cast_fp16")]; tensor var_10183_axes_0 = const()[name = string("op_10183_axes_0"), val = tensor([-1])]; tensor var_10183_cast_fp16 = expand_dims(axes = var_10183_axes_0, x = var_10181_cast_fp16)[name = string("op_10183_cast_fp16")]; tensor ao_49_axes_0 = const()[name = string("ao_49_axes_0"), val = tensor([-1])]; tensor ao_49_cast_fp16 = expand_dims(axes = ao_49_axes_0, x = var_10183_cast_fp16)[name = string("ao_49_cast_fp16")]; tensor hidden_97_cast_fp16 = add(x = hidden_95_cast_fp16, y = ao_49_cast_fp16)[name = string("hidden_97_cast_fp16")]; tensor var_10189_axes_0 = const()[name = string("op_10189_axes_0"), val = tensor([-1])]; tensor var_10189_cast_fp16 = squeeze(axes = var_10189_axes_0, x = hidden_97_cast_fp16)[name = string("op_10189_cast_fp16")]; tensor var_10191_axes_0 = const()[name = string("op_10191_axes_0"), val = tensor([-1])]; tensor var_10191_cast_fp16 = squeeze(axes = var_10191_axes_0, x = var_10189_cast_fp16)[name = string("op_10191_cast_fp16")]; tensor hidden_states_397_axes_0 = const()[name = string("hidden_states_397_axes_0"), val = tensor([0])]; tensor hidden_states_397_cast_fp16 = expand_dims(axes = hidden_states_397_axes_0, x = var_10191_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; fp16 var_10197_promoted_to_fp16 = const()[name = string("op_10197_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10203_cast_fp16 = pow(x = hidden_states_397_cast_fp16, y = var_10197_promoted_to_fp16)[name = string("op_10203_cast_fp16")]; tensor variance_199_axes_0 = const()[name = string("variance_199_axes_0"), val = tensor([-1])]; bool variance_199_keep_dims_0 = const()[name = string("variance_199_keep_dims_0"), val = bool(true)]; tensor variance_199_cast_fp16 = reduce_mean(axes = variance_199_axes_0, keep_dims = variance_199_keep_dims_0, x = var_10203_cast_fp16)[name = string("variance_199_cast_fp16")]; tensor const_250_to_fp16 = const()[name = string("const_250_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384021120)))]; tensor var_10207_cast_fp16 = mul(x = const_250_to_fp16, y = hidden_states_397_cast_fp16)[name = string("op_10207_cast_fp16")]; fp16 var_10208_to_fp16 = const()[name = string("op_10208_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10209_cast_fp16 = add(x = variance_199_cast_fp16, y = var_10208_to_fp16)[name = string("op_10209_cast_fp16")]; fp32 var_10210_epsilon_0 = const()[name = string("op_10210_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10210_cast_fp16 = rsqrt(epsilon = var_10210_epsilon_0, x = var_10209_cast_fp16)[name = string("op_10210_cast_fp16")]; tensor input_245_cast_fp16 = mul(x = var_10207_cast_fp16, y = var_10210_cast_fp16)[name = string("input_245_cast_fp16")]; tensor layers_24_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384023232))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387169024))))[name = string("layers_24_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_gate_proj_weight_to_fp16_palettized, x = input_245_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor var_10218_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_10218_cast_fp16")]; tensor layers_24_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387169600))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390315392))))[name = string("layers_24_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_24_mlp_up_proj_weight_to_fp16_palettized, x = input_245_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor input_249_cast_fp16 = mul(x = var_10218_cast_fp16, y = linear_173_cast_fp16)[name = string("input_249_cast_fp16")]; tensor layers_24_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390315968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393461760))))[name = string("layers_24_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_174_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_24_mlp_down_proj_weight_to_fp16_palettized, x = input_249_cast_fp16)[name = string("linear_174_cast_fp16")]; tensor var_10225_axes_0 = const()[name = string("op_10225_axes_0"), val = tensor([0])]; tensor var_10225_cast_fp16 = squeeze(axes = var_10225_axes_0, x = linear_174_cast_fp16)[name = string("op_10225_cast_fp16")]; tensor var_10227_axes_0 = const()[name = string("op_10227_axes_0"), val = tensor([0])]; tensor var_10227_cast_fp16 = squeeze(axes = var_10227_axes_0, x = var_10225_cast_fp16)[name = string("op_10227_cast_fp16")]; tensor var_10229_axes_0 = const()[name = string("op_10229_axes_0"), val = tensor([-1])]; tensor var_10229_cast_fp16 = expand_dims(axes = var_10229_axes_0, x = var_10227_cast_fp16)[name = string("op_10229_cast_fp16")]; tensor h_49_axes_0 = const()[name = string("h_49_axes_0"), val = tensor([-1])]; tensor h_49_cast_fp16 = expand_dims(axes = h_49_axes_0, x = var_10229_cast_fp16)[name = string("h_49_cast_fp16")]; tensor hidden_99_cast_fp16 = add(x = hidden_97_cast_fp16, y = h_49_cast_fp16)[name = string("hidden_99_cast_fp16")]; tensor var_10243_begin_0 = const()[name = string("op_10243_begin_0"), val = tensor([0, 25600, 0, 0])]; tensor var_10243_end_0 = const()[name = string("op_10243_end_0"), val = tensor([1, 26624, 1, 256])]; tensor var_10243_end_mask_0 = const()[name = string("op_10243_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10243_cast_fp16 = slice_by_index(begin = var_10243_begin_0, end = var_10243_end_0, end_mask = var_10243_end_mask_0, x = key_cache)[name = string("op_10243_cast_fp16")]; tensor var_10263_begin_0 = const()[name = string("op_10263_begin_0"), val = tensor([0, 25600, 0, 0])]; tensor var_10263_end_0 = const()[name = string("op_10263_end_0"), val = tensor([1, 26624, 1, 256])]; tensor var_10263_end_mask_0 = const()[name = string("op_10263_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10263_cast_fp16 = slice_by_index(begin = var_10263_begin_0, end = var_10263_end_0, end_mask = var_10263_end_mask_0, x = value_cache)[name = string("op_10263_cast_fp16")]; tensor var_10275_axes_0 = const()[name = string("op_10275_axes_0"), val = tensor([-1])]; tensor var_10275_cast_fp16 = squeeze(axes = var_10275_axes_0, x = hidden_99_cast_fp16)[name = string("op_10275_cast_fp16")]; tensor var_10277_axes_0 = const()[name = string("op_10277_axes_0"), val = tensor([-1])]; tensor var_10277_cast_fp16 = squeeze(axes = var_10277_axes_0, x = var_10275_cast_fp16)[name = string("op_10277_cast_fp16")]; tensor hidden_states_401_axes_0 = const()[name = string("hidden_states_401_axes_0"), val = tensor([0])]; tensor hidden_states_401_cast_fp16 = expand_dims(axes = hidden_states_401_axes_0, x = var_10277_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; fp16 var_10283_promoted_to_fp16 = const()[name = string("op_10283_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10289_cast_fp16 = pow(x = hidden_states_401_cast_fp16, y = var_10283_promoted_to_fp16)[name = string("op_10289_cast_fp16")]; tensor variance_201_axes_0 = const()[name = string("variance_201_axes_0"), val = tensor([-1])]; bool variance_201_keep_dims_0 = const()[name = string("variance_201_keep_dims_0"), val = bool(true)]; tensor variance_201_cast_fp16 = reduce_mean(axes = variance_201_axes_0, keep_dims = variance_201_keep_dims_0, x = var_10289_cast_fp16)[name = string("variance_201_cast_fp16")]; tensor const_251_to_fp16 = const()[name = string("const_251_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393462336)))]; tensor var_10293_cast_fp16 = mul(x = const_251_to_fp16, y = hidden_states_401_cast_fp16)[name = string("op_10293_cast_fp16")]; fp16 var_10294_to_fp16 = const()[name = string("op_10294_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10295_cast_fp16 = add(x = variance_201_cast_fp16, y = var_10294_to_fp16)[name = string("op_10295_cast_fp16")]; fp32 var_10296_epsilon_0 = const()[name = string("op_10296_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10296_cast_fp16 = rsqrt(epsilon = var_10296_epsilon_0, x = var_10295_cast_fp16)[name = string("op_10296_cast_fp16")]; tensor input_251_cast_fp16 = mul(x = var_10293_cast_fp16, y = var_10296_cast_fp16)[name = string("input_251_cast_fp16")]; tensor layers_25_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393464448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395561664))))[name = string("layers_25_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_25_self_attn_q_proj_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor var_10305 = const()[name = string("op_10305"), val = tensor([1, 1, 16, 128])]; tensor var_10306_cast_fp16 = reshape(shape = var_10305, x = linear_175_cast_fp16)[name = string("op_10306_cast_fp16")]; tensor layers_25_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395562240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396610880))))[name = string("layers_25_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_k_proj_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor var_10317 = const()[name = string("op_10317"), val = tensor([1, 1, 8, 128])]; tensor var_10318_cast_fp16 = reshape(shape = var_10317, x = linear_176_cast_fp16)[name = string("op_10318_cast_fp16")]; tensor layers_25_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396611456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660096))))[name = string("layers_25_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_v_proj_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("linear_177_cast_fp16")]; fp16 var_10337_promoted_to_fp16 = const()[name = string("op_10337_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10343_cast_fp16 = pow(x = var_10306_cast_fp16, y = var_10337_promoted_to_fp16)[name = string("op_10343_cast_fp16")]; bool variance_203_keep_dims_0 = const()[name = string("variance_203_keep_dims_0"), val = bool(true)]; tensor const_382 = const()[name = string("const_382"), val = tensor([3])]; tensor variance_203_cast_fp16 = reduce_mean(axes = const_382, keep_dims = variance_203_keep_dims_0, x = var_10343_cast_fp16)[name = string("variance_203_cast_fp16")]; tensor const_383_to_fp16 = const()[name = string("const_383_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660672)))]; tensor var_10347_cast_fp16 = mul(x = const_383_to_fp16, y = var_10306_cast_fp16)[name = string("op_10347_cast_fp16")]; fp16 var_10348_to_fp16 = const()[name = string("op_10348_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10349_cast_fp16 = add(x = variance_203_cast_fp16, y = var_10348_to_fp16)[name = string("op_10349_cast_fp16")]; fp32 var_10350_epsilon_0 = const()[name = string("op_10350_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10350_cast_fp16 = rsqrt(epsilon = var_10350_epsilon_0, x = var_10349_cast_fp16)[name = string("op_10350_cast_fp16")]; tensor q_151_cast_fp16 = mul(x = var_10347_cast_fp16, y = var_10350_cast_fp16)[name = string("q_151_cast_fp16")]; fp16 var_10355_promoted_to_fp16 = const()[name = string("op_10355_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10361_cast_fp16 = pow(x = var_10318_cast_fp16, y = var_10355_promoted_to_fp16)[name = string("op_10361_cast_fp16")]; bool variance_205_keep_dims_0 = const()[name = string("variance_205_keep_dims_0"), val = bool(true)]; tensor const_384 = const()[name = string("const_384"), val = tensor([3])]; tensor variance_205_cast_fp16 = reduce_mean(axes = const_384, keep_dims = variance_205_keep_dims_0, x = var_10361_cast_fp16)[name = string("variance_205_cast_fp16")]; tensor const_385_to_fp16 = const()[name = string("const_385_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397660992)))]; tensor var_10365_cast_fp16 = mul(x = const_385_to_fp16, y = var_10318_cast_fp16)[name = string("op_10365_cast_fp16")]; fp16 var_10366_to_fp16 = const()[name = string("op_10366_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10367_cast_fp16 = add(x = variance_205_cast_fp16, y = var_10366_to_fp16)[name = string("op_10367_cast_fp16")]; fp32 var_10368_epsilon_0 = const()[name = string("op_10368_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10368_cast_fp16 = rsqrt(epsilon = var_10368_epsilon_0, x = var_10367_cast_fp16)[name = string("op_10368_cast_fp16")]; tensor k_151_cast_fp16 = mul(x = var_10365_cast_fp16, y = var_10368_cast_fp16)[name = string("k_151_cast_fp16")]; tensor var_10383_cast_fp16 = mul(x = q_151_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10383_cast_fp16")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_151_cast_fp16)[name = string("x1_101_cast_fp16")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_151_cast_fp16)[name = string("x2_101_cast_fp16")]; fp16 const_256_promoted_to_fp16 = const()[name = string("const_256_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10404_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_256_promoted_to_fp16)[name = string("op_10404_cast_fp16")]; int32 var_10406 = const()[name = string("op_10406"), val = int32(-1)]; bool var_10407_interleave_0 = const()[name = string("op_10407_interleave_0"), val = bool(false)]; tensor var_10407_cast_fp16 = concat(axis = var_10406, interleave = var_10407_interleave_0, values = (var_10404_cast_fp16, x1_101_cast_fp16))[name = string("op_10407_cast_fp16")]; tensor var_10408_cast_fp16 = mul(x = var_10407_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10408_cast_fp16")]; tensor q_155_cast_fp16 = add(x = var_10383_cast_fp16, y = var_10408_cast_fp16)[name = string("q_155_cast_fp16")]; tensor var_10411_cast_fp16 = mul(x = k_151_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10411_cast_fp16")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_151_cast_fp16)[name = string("x1_103_cast_fp16")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_151_cast_fp16)[name = string("x2_103_cast_fp16")]; fp16 const_259_promoted_to_fp16 = const()[name = string("const_259_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10432_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_259_promoted_to_fp16)[name = string("op_10432_cast_fp16")]; int32 var_10434 = const()[name = string("op_10434"), val = int32(-1)]; bool var_10435_interleave_0 = const()[name = string("op_10435_interleave_0"), val = bool(false)]; tensor var_10435_cast_fp16 = concat(axis = var_10434, interleave = var_10435_interleave_0, values = (var_10432_cast_fp16, x1_103_cast_fp16))[name = string("op_10435_cast_fp16")]; tensor var_10436_cast_fp16 = mul(x = var_10435_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10436_cast_fp16")]; tensor k_155_cast_fp16 = add(x = var_10411_cast_fp16, y = var_10436_cast_fp16)[name = string("k_155_cast_fp16")]; tensor var_10443 = const()[name = string("op_10443"), val = tensor([1, 1024, 1, 1])]; tensor nk_51_cast_fp16 = reshape(shape = var_10443, x = k_155_cast_fp16)[name = string("nk_51_cast_fp16")]; tensor var_10449 = const()[name = string("op_10449"), val = tensor([1, 1024, 1, 1])]; tensor nv_51_cast_fp16 = reshape(shape = var_10449, x = linear_177_cast_fp16)[name = string("nv_51_cast_fp16")]; tensor var_10454_cast_fp16 = mul(x = var_10243_cast_fp16, y = var_1203_cast_fp16)[name = string("op_10454_cast_fp16")]; tensor var_10455_cast_fp16 = mul(x = nk_51_cast_fp16, y = update_mask_cast_fp16)[name = string("op_10455_cast_fp16")]; tensor lkc_103_cast_fp16 = add(x = var_10454_cast_fp16, y = var_10455_cast_fp16)[name = string("lkc_103_cast_fp16")]; tensor var_10461_cast_fp16 = mul(x = var_10263_cast_fp16, y = var_1203_cast_fp16)[name = string("op_10461_cast_fp16")]; tensor var_10462_cast_fp16 = mul(x = nv_51_cast_fp16, y = update_mask_cast_fp16)[name = string("op_10462_cast_fp16")]; tensor lvc_103_cast_fp16 = add(x = var_10461_cast_fp16, y = var_10462_cast_fp16)[name = string("lvc_103_cast_fp16")]; tensor var_10466_axes_0 = const()[name = string("op_10466_axes_0"), val = tensor([2])]; tensor var_10466_cast_fp16 = squeeze(axes = var_10466_axes_0, x = lkc_103_cast_fp16)[name = string("op_10466_cast_fp16")]; tensor var_10471 = const()[name = string("op_10471"), val = tensor([1, 8, 128, 256])]; tensor kc_101_cast_fp16 = reshape(shape = var_10471, x = var_10466_cast_fp16)[name = string("kc_101_cast_fp16")]; tensor var_10474_axes_0 = const()[name = string("op_10474_axes_0"), val = tensor([2])]; tensor var_10474_cast_fp16 = squeeze(axes = var_10474_axes_0, x = lvc_103_cast_fp16)[name = string("op_10474_cast_fp16")]; tensor var_10479 = const()[name = string("op_10479"), val = tensor([1, 8, 128, 256])]; tensor vc_101_cast_fp16 = reshape(shape = var_10479, x = var_10474_cast_fp16)[name = string("vc_101_cast_fp16")]; tensor var_10482_axes_0 = const()[name = string("op_10482_axes_0"), val = tensor([2])]; tensor var_10482_cast_fp16 = expand_dims(axes = var_10482_axes_0, x = kc_101_cast_fp16)[name = string("op_10482_cast_fp16")]; tensor var_10490_reps_0 = const()[name = string("op_10490_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10490_cast_fp16 = tile(reps = var_10490_reps_0, x = var_10482_cast_fp16)[name = string("op_10490_cast_fp16")]; tensor var_10495 = const()[name = string("op_10495"), val = tensor([1, 16, 128, 256])]; tensor kc_103_cast_fp16 = reshape(shape = var_10495, x = var_10490_cast_fp16)[name = string("kc_103_cast_fp16")]; tensor var_10498_axes_0 = const()[name = string("op_10498_axes_0"), val = tensor([2])]; tensor var_10498_cast_fp16 = expand_dims(axes = var_10498_axes_0, x = vc_101_cast_fp16)[name = string("op_10498_cast_fp16")]; tensor var_10506_reps_0 = const()[name = string("op_10506_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10506_cast_fp16 = tile(reps = var_10506_reps_0, x = var_10498_cast_fp16)[name = string("op_10506_cast_fp16")]; tensor var_10511 = const()[name = string("op_10511"), val = tensor([1, 16, 128, 256])]; tensor vc_103_cast_fp16 = reshape(shape = var_10511, x = var_10506_cast_fp16)[name = string("vc_103_cast_fp16")]; tensor var_10515_perm_0 = const()[name = string("op_10515_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_10516_transpose_x_0 = const()[name = string("op_10516_transpose_x_0"), val = bool(false)]; bool var_10516_transpose_y_0 = const()[name = string("op_10516_transpose_y_0"), val = bool(false)]; tensor var_10515_cast_fp16 = transpose(perm = var_10515_perm_0, x = q_155_cast_fp16)[name = string("transpose_5")]; tensor var_10516_cast_fp16 = matmul(transpose_x = var_10516_transpose_x_0, transpose_y = var_10516_transpose_y_0, x = var_10515_cast_fp16, y = kc_103_cast_fp16)[name = string("op_10516_cast_fp16")]; fp16 _inversed_aw_201_y_0_to_fp16 = const()[name = string("_inversed_aw_201_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_201_cast_fp16 = mul(x = var_10516_cast_fp16, y = _inversed_aw_201_y_0_to_fp16)[name = string("_inversed_aw_201_cast_fp16")]; tensor aw_203_cast_fp16 = add(x = _inversed_aw_201_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_203_cast_fp16")]; int32 var_10530 = const()[name = string("op_10530"), val = int32(-1)]; tensor aw_207_cast_fp16 = softmax(axis = var_10530, x = aw_203_cast_fp16)[name = string("aw_207_cast_fp16")]; bool var_10536_transpose_x_1 = const()[name = string("op_10536_transpose_x_1"), val = bool(false)]; bool var_10536_transpose_y_1 = const()[name = string("op_10536_transpose_y_1"), val = bool(true)]; tensor var_10536_cast_fp16 = matmul(transpose_x = var_10536_transpose_x_1, transpose_y = var_10536_transpose_y_1, x = aw_207_cast_fp16, y = vc_103_cast_fp16)[name = string("op_10536_cast_fp16")]; tensor var_10539_perm_0 = const()[name = string("op_10539_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10543 = const()[name = string("op_10543"), val = tensor([1, 1, -1])]; tensor var_10539_cast_fp16 = transpose(perm = var_10539_perm_0, x = var_10536_cast_fp16)[name = string("transpose_4")]; tensor input_253_cast_fp16 = reshape(shape = var_10543, x = var_10539_cast_fp16)[name = string("input_253_cast_fp16")]; tensor layers_25_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397661312))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399758528))))[name = string("layers_25_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_178_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_self_attn_o_proj_weight_to_fp16_palettized, x = input_253_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor var_10549_axes_0 = const()[name = string("op_10549_axes_0"), val = tensor([0])]; tensor var_10549_cast_fp16 = squeeze(axes = var_10549_axes_0, x = linear_178_cast_fp16)[name = string("op_10549_cast_fp16")]; tensor var_10551_axes_0 = const()[name = string("op_10551_axes_0"), val = tensor([0])]; tensor var_10551_cast_fp16 = squeeze(axes = var_10551_axes_0, x = var_10549_cast_fp16)[name = string("op_10551_cast_fp16")]; tensor var_10553_axes_0 = const()[name = string("op_10553_axes_0"), val = tensor([-1])]; tensor var_10553_cast_fp16 = expand_dims(axes = var_10553_axes_0, x = var_10551_cast_fp16)[name = string("op_10553_cast_fp16")]; tensor ao_51_axes_0 = const()[name = string("ao_51_axes_0"), val = tensor([-1])]; tensor ao_51_cast_fp16 = expand_dims(axes = ao_51_axes_0, x = var_10553_cast_fp16)[name = string("ao_51_cast_fp16")]; tensor hidden_101_cast_fp16 = add(x = hidden_99_cast_fp16, y = ao_51_cast_fp16)[name = string("hidden_101_cast_fp16")]; tensor var_10559_axes_0 = const()[name = string("op_10559_axes_0"), val = tensor([-1])]; tensor var_10559_cast_fp16 = squeeze(axes = var_10559_axes_0, x = hidden_101_cast_fp16)[name = string("op_10559_cast_fp16")]; tensor var_10561_axes_0 = const()[name = string("op_10561_axes_0"), val = tensor([-1])]; tensor var_10561_cast_fp16 = squeeze(axes = var_10561_axes_0, x = var_10559_cast_fp16)[name = string("op_10561_cast_fp16")]; tensor hidden_states_413_axes_0 = const()[name = string("hidden_states_413_axes_0"), val = tensor([0])]; tensor hidden_states_413_cast_fp16 = expand_dims(axes = hidden_states_413_axes_0, x = var_10561_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; fp16 var_10567_promoted_to_fp16 = const()[name = string("op_10567_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10573_cast_fp16 = pow(x = hidden_states_413_cast_fp16, y = var_10567_promoted_to_fp16)[name = string("op_10573_cast_fp16")]; tensor variance_207_axes_0 = const()[name = string("variance_207_axes_0"), val = tensor([-1])]; bool variance_207_keep_dims_0 = const()[name = string("variance_207_keep_dims_0"), val = bool(true)]; tensor variance_207_cast_fp16 = reduce_mean(axes = variance_207_axes_0, keep_dims = variance_207_keep_dims_0, x = var_10573_cast_fp16)[name = string("variance_207_cast_fp16")]; tensor const_260_to_fp16 = const()[name = string("const_260_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399759104)))]; tensor var_10577_cast_fp16 = mul(x = const_260_to_fp16, y = hidden_states_413_cast_fp16)[name = string("op_10577_cast_fp16")]; fp16 var_10578_to_fp16 = const()[name = string("op_10578_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10579_cast_fp16 = add(x = variance_207_cast_fp16, y = var_10578_to_fp16)[name = string("op_10579_cast_fp16")]; fp32 var_10580_epsilon_0 = const()[name = string("op_10580_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10580_cast_fp16 = rsqrt(epsilon = var_10580_epsilon_0, x = var_10579_cast_fp16)[name = string("op_10580_cast_fp16")]; tensor input_255_cast_fp16 = mul(x = var_10577_cast_fp16, y = var_10580_cast_fp16)[name = string("input_255_cast_fp16")]; tensor layers_25_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399761216))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402907008))))[name = string("layers_25_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_gate_proj_weight_to_fp16_palettized, x = input_255_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor var_10588_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_10588_cast_fp16")]; tensor layers_25_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402907584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406053376))))[name = string("layers_25_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_25_mlp_up_proj_weight_to_fp16_palettized, x = input_255_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor input_259_cast_fp16 = mul(x = var_10588_cast_fp16, y = linear_180_cast_fp16)[name = string("input_259_cast_fp16")]; tensor layers_25_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406053952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409199744))))[name = string("layers_25_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_25_mlp_down_proj_weight_to_fp16_palettized, x = input_259_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor var_10595_axes_0 = const()[name = string("op_10595_axes_0"), val = tensor([0])]; tensor var_10595_cast_fp16 = squeeze(axes = var_10595_axes_0, x = linear_181_cast_fp16)[name = string("op_10595_cast_fp16")]; tensor var_10597_axes_0 = const()[name = string("op_10597_axes_0"), val = tensor([0])]; tensor var_10597_cast_fp16 = squeeze(axes = var_10597_axes_0, x = var_10595_cast_fp16)[name = string("op_10597_cast_fp16")]; tensor var_10599_axes_0 = const()[name = string("op_10599_axes_0"), val = tensor([-1])]; tensor var_10599_cast_fp16 = expand_dims(axes = var_10599_axes_0, x = var_10597_cast_fp16)[name = string("op_10599_cast_fp16")]; tensor h_51_axes_0 = const()[name = string("h_51_axes_0"), val = tensor([-1])]; tensor h_51_cast_fp16 = expand_dims(axes = h_51_axes_0, x = var_10599_cast_fp16)[name = string("h_51_cast_fp16")]; tensor hidden_103_cast_fp16 = add(x = hidden_101_cast_fp16, y = h_51_cast_fp16)[name = string("hidden_103_cast_fp16")]; tensor var_10613_begin_0 = const()[name = string("op_10613_begin_0"), val = tensor([0, 26624, 0, 0])]; tensor var_10613_end_0 = const()[name = string("op_10613_end_0"), val = tensor([1, 27648, 1, 256])]; tensor var_10613_end_mask_0 = const()[name = string("op_10613_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10613_cast_fp16 = slice_by_index(begin = var_10613_begin_0, end = var_10613_end_0, end_mask = var_10613_end_mask_0, x = key_cache)[name = string("op_10613_cast_fp16")]; tensor var_10633_begin_0 = const()[name = string("op_10633_begin_0"), val = tensor([0, 26624, 0, 0])]; tensor var_10633_end_0 = const()[name = string("op_10633_end_0"), val = tensor([1, 27648, 1, 256])]; tensor var_10633_end_mask_0 = const()[name = string("op_10633_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10633_cast_fp16 = slice_by_index(begin = var_10633_begin_0, end = var_10633_end_0, end_mask = var_10633_end_mask_0, x = value_cache)[name = string("op_10633_cast_fp16")]; tensor var_10645_axes_0 = const()[name = string("op_10645_axes_0"), val = tensor([-1])]; tensor var_10645_cast_fp16 = squeeze(axes = var_10645_axes_0, x = hidden_103_cast_fp16)[name = string("op_10645_cast_fp16")]; tensor var_10647_axes_0 = const()[name = string("op_10647_axes_0"), val = tensor([-1])]; tensor var_10647_cast_fp16 = squeeze(axes = var_10647_axes_0, x = var_10645_cast_fp16)[name = string("op_10647_cast_fp16")]; tensor hidden_states_417_axes_0 = const()[name = string("hidden_states_417_axes_0"), val = tensor([0])]; tensor hidden_states_417_cast_fp16 = expand_dims(axes = hidden_states_417_axes_0, x = var_10647_cast_fp16)[name = string("hidden_states_417_cast_fp16")]; fp16 var_10653_promoted_to_fp16 = const()[name = string("op_10653_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10659_cast_fp16 = pow(x = hidden_states_417_cast_fp16, y = var_10653_promoted_to_fp16)[name = string("op_10659_cast_fp16")]; tensor variance_209_axes_0 = const()[name = string("variance_209_axes_0"), val = tensor([-1])]; bool variance_209_keep_dims_0 = const()[name = string("variance_209_keep_dims_0"), val = bool(true)]; tensor variance_209_cast_fp16 = reduce_mean(axes = variance_209_axes_0, keep_dims = variance_209_keep_dims_0, x = var_10659_cast_fp16)[name = string("variance_209_cast_fp16")]; tensor const_261_to_fp16 = const()[name = string("const_261_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409200320)))]; tensor var_10663_cast_fp16 = mul(x = const_261_to_fp16, y = hidden_states_417_cast_fp16)[name = string("op_10663_cast_fp16")]; fp16 var_10664_to_fp16 = const()[name = string("op_10664_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10665_cast_fp16 = add(x = variance_209_cast_fp16, y = var_10664_to_fp16)[name = string("op_10665_cast_fp16")]; fp32 var_10666_epsilon_0 = const()[name = string("op_10666_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10666_cast_fp16 = rsqrt(epsilon = var_10666_epsilon_0, x = var_10665_cast_fp16)[name = string("op_10666_cast_fp16")]; tensor input_261_cast_fp16 = mul(x = var_10663_cast_fp16, y = var_10666_cast_fp16)[name = string("input_261_cast_fp16")]; tensor layers_26_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409202432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411299648))))[name = string("layers_26_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_26_self_attn_q_proj_weight_to_fp16_palettized, x = input_261_cast_fp16)[name = string("linear_182_cast_fp16")]; tensor var_10675 = const()[name = string("op_10675"), val = tensor([1, 1, 16, 128])]; tensor var_10676_cast_fp16 = reshape(shape = var_10675, x = linear_182_cast_fp16)[name = string("op_10676_cast_fp16")]; tensor layers_26_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411300224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412348864))))[name = string("layers_26_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_k_proj_weight_to_fp16_palettized, x = input_261_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor var_10687 = const()[name = string("op_10687"), val = tensor([1, 1, 8, 128])]; tensor var_10688_cast_fp16 = reshape(shape = var_10687, x = linear_183_cast_fp16)[name = string("op_10688_cast_fp16")]; tensor layers_26_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412349440))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398080))))[name = string("layers_26_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_v_proj_weight_to_fp16_palettized, x = input_261_cast_fp16)[name = string("linear_184_cast_fp16")]; fp16 var_10707_promoted_to_fp16 = const()[name = string("op_10707_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10713_cast_fp16 = pow(x = var_10676_cast_fp16, y = var_10707_promoted_to_fp16)[name = string("op_10713_cast_fp16")]; bool variance_211_keep_dims_0 = const()[name = string("variance_211_keep_dims_0"), val = bool(true)]; tensor const_386 = const()[name = string("const_386"), val = tensor([3])]; tensor variance_211_cast_fp16 = reduce_mean(axes = const_386, keep_dims = variance_211_keep_dims_0, x = var_10713_cast_fp16)[name = string("variance_211_cast_fp16")]; tensor const_387_to_fp16 = const()[name = string("const_387_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398656)))]; tensor var_10717_cast_fp16 = mul(x = const_387_to_fp16, y = var_10676_cast_fp16)[name = string("op_10717_cast_fp16")]; fp16 var_10718_to_fp16 = const()[name = string("op_10718_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10719_cast_fp16 = add(x = variance_211_cast_fp16, y = var_10718_to_fp16)[name = string("op_10719_cast_fp16")]; fp32 var_10720_epsilon_0 = const()[name = string("op_10720_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10720_cast_fp16 = rsqrt(epsilon = var_10720_epsilon_0, x = var_10719_cast_fp16)[name = string("op_10720_cast_fp16")]; tensor q_157_cast_fp16 = mul(x = var_10717_cast_fp16, y = var_10720_cast_fp16)[name = string("q_157_cast_fp16")]; fp16 var_10725_promoted_to_fp16 = const()[name = string("op_10725_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10731_cast_fp16 = pow(x = var_10688_cast_fp16, y = var_10725_promoted_to_fp16)[name = string("op_10731_cast_fp16")]; bool variance_213_keep_dims_0 = const()[name = string("variance_213_keep_dims_0"), val = bool(true)]; tensor const_388 = const()[name = string("const_388"), val = tensor([3])]; tensor variance_213_cast_fp16 = reduce_mean(axes = const_388, keep_dims = variance_213_keep_dims_0, x = var_10731_cast_fp16)[name = string("variance_213_cast_fp16")]; tensor const_389_to_fp16 = const()[name = string("const_389_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413398976)))]; tensor var_10735_cast_fp16 = mul(x = const_389_to_fp16, y = var_10688_cast_fp16)[name = string("op_10735_cast_fp16")]; fp16 var_10736_to_fp16 = const()[name = string("op_10736_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10737_cast_fp16 = add(x = variance_213_cast_fp16, y = var_10736_to_fp16)[name = string("op_10737_cast_fp16")]; fp32 var_10738_epsilon_0 = const()[name = string("op_10738_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10738_cast_fp16 = rsqrt(epsilon = var_10738_epsilon_0, x = var_10737_cast_fp16)[name = string("op_10738_cast_fp16")]; tensor k_157_cast_fp16 = mul(x = var_10735_cast_fp16, y = var_10738_cast_fp16)[name = string("k_157_cast_fp16")]; tensor var_10753_cast_fp16 = mul(x = q_157_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10753_cast_fp16")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_157_cast_fp16)[name = string("x1_105_cast_fp16")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_157_cast_fp16)[name = string("x2_105_cast_fp16")]; fp16 const_266_promoted_to_fp16 = const()[name = string("const_266_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10774_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_266_promoted_to_fp16)[name = string("op_10774_cast_fp16")]; int32 var_10776 = const()[name = string("op_10776"), val = int32(-1)]; bool var_10777_interleave_0 = const()[name = string("op_10777_interleave_0"), val = bool(false)]; tensor var_10777_cast_fp16 = concat(axis = var_10776, interleave = var_10777_interleave_0, values = (var_10774_cast_fp16, x1_105_cast_fp16))[name = string("op_10777_cast_fp16")]; tensor var_10778_cast_fp16 = mul(x = var_10777_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10778_cast_fp16")]; tensor q_161_cast_fp16 = add(x = var_10753_cast_fp16, y = var_10778_cast_fp16)[name = string("q_161_cast_fp16")]; tensor var_10781_cast_fp16 = mul(x = k_157_cast_fp16, y = cos_1_cast_fp16)[name = string("op_10781_cast_fp16")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_157_cast_fp16)[name = string("x1_107_cast_fp16")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_157_cast_fp16)[name = string("x2_107_cast_fp16")]; fp16 const_269_promoted_to_fp16 = const()[name = string("const_269_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_10802_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_269_promoted_to_fp16)[name = string("op_10802_cast_fp16")]; int32 var_10804 = const()[name = string("op_10804"), val = int32(-1)]; bool var_10805_interleave_0 = const()[name = string("op_10805_interleave_0"), val = bool(false)]; tensor var_10805_cast_fp16 = concat(axis = var_10804, interleave = var_10805_interleave_0, values = (var_10802_cast_fp16, x1_107_cast_fp16))[name = string("op_10805_cast_fp16")]; tensor var_10806_cast_fp16 = mul(x = var_10805_cast_fp16, y = sin_1_cast_fp16)[name = string("op_10806_cast_fp16")]; tensor k_161_cast_fp16 = add(x = var_10781_cast_fp16, y = var_10806_cast_fp16)[name = string("k_161_cast_fp16")]; tensor var_10813 = const()[name = string("op_10813"), val = tensor([1, 1024, 1, 1])]; tensor nk_53_cast_fp16 = reshape(shape = var_10813, x = k_161_cast_fp16)[name = string("nk_53_cast_fp16")]; tensor var_10819 = const()[name = string("op_10819"), val = tensor([1, 1024, 1, 1])]; tensor nv_53_cast_fp16 = reshape(shape = var_10819, x = linear_184_cast_fp16)[name = string("nv_53_cast_fp16")]; tensor var_10824_cast_fp16 = mul(x = var_10613_cast_fp16, y = var_1203_cast_fp16)[name = string("op_10824_cast_fp16")]; tensor var_10825_cast_fp16 = mul(x = nk_53_cast_fp16, y = update_mask_cast_fp16)[name = string("op_10825_cast_fp16")]; tensor lkc_107_cast_fp16 = add(x = var_10824_cast_fp16, y = var_10825_cast_fp16)[name = string("lkc_107_cast_fp16")]; tensor var_10831_cast_fp16 = mul(x = var_10633_cast_fp16, y = var_1203_cast_fp16)[name = string("op_10831_cast_fp16")]; tensor var_10832_cast_fp16 = mul(x = nv_53_cast_fp16, y = update_mask_cast_fp16)[name = string("op_10832_cast_fp16")]; tensor lvc_107_cast_fp16 = add(x = var_10831_cast_fp16, y = var_10832_cast_fp16)[name = string("lvc_107_cast_fp16")]; tensor var_10836_axes_0 = const()[name = string("op_10836_axes_0"), val = tensor([2])]; tensor var_10836_cast_fp16 = squeeze(axes = var_10836_axes_0, x = lkc_107_cast_fp16)[name = string("op_10836_cast_fp16")]; tensor var_10841 = const()[name = string("op_10841"), val = tensor([1, 8, 128, 256])]; tensor kc_105_cast_fp16 = reshape(shape = var_10841, x = var_10836_cast_fp16)[name = string("kc_105_cast_fp16")]; tensor var_10844_axes_0 = const()[name = string("op_10844_axes_0"), val = tensor([2])]; tensor var_10844_cast_fp16 = squeeze(axes = var_10844_axes_0, x = lvc_107_cast_fp16)[name = string("op_10844_cast_fp16")]; tensor var_10849 = const()[name = string("op_10849"), val = tensor([1, 8, 128, 256])]; tensor vc_105_cast_fp16 = reshape(shape = var_10849, x = var_10844_cast_fp16)[name = string("vc_105_cast_fp16")]; tensor var_10852_axes_0 = const()[name = string("op_10852_axes_0"), val = tensor([2])]; tensor var_10852_cast_fp16 = expand_dims(axes = var_10852_axes_0, x = kc_105_cast_fp16)[name = string("op_10852_cast_fp16")]; tensor var_10860_reps_0 = const()[name = string("op_10860_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10860_cast_fp16 = tile(reps = var_10860_reps_0, x = var_10852_cast_fp16)[name = string("op_10860_cast_fp16")]; tensor var_10865 = const()[name = string("op_10865"), val = tensor([1, 16, 128, 256])]; tensor kc_107_cast_fp16 = reshape(shape = var_10865, x = var_10860_cast_fp16)[name = string("kc_107_cast_fp16")]; tensor var_10868_axes_0 = const()[name = string("op_10868_axes_0"), val = tensor([2])]; tensor var_10868_cast_fp16 = expand_dims(axes = var_10868_axes_0, x = vc_105_cast_fp16)[name = string("op_10868_cast_fp16")]; tensor var_10876_reps_0 = const()[name = string("op_10876_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_10876_cast_fp16 = tile(reps = var_10876_reps_0, x = var_10868_cast_fp16)[name = string("op_10876_cast_fp16")]; tensor var_10881 = const()[name = string("op_10881"), val = tensor([1, 16, 128, 256])]; tensor vc_107_cast_fp16 = reshape(shape = var_10881, x = var_10876_cast_fp16)[name = string("vc_107_cast_fp16")]; tensor var_10885_perm_0 = const()[name = string("op_10885_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_10886_transpose_x_0 = const()[name = string("op_10886_transpose_x_0"), val = bool(false)]; bool var_10886_transpose_y_0 = const()[name = string("op_10886_transpose_y_0"), val = bool(false)]; tensor var_10885_cast_fp16 = transpose(perm = var_10885_perm_0, x = q_161_cast_fp16)[name = string("transpose_3")]; tensor var_10886_cast_fp16 = matmul(transpose_x = var_10886_transpose_x_0, transpose_y = var_10886_transpose_y_0, x = var_10885_cast_fp16, y = kc_107_cast_fp16)[name = string("op_10886_cast_fp16")]; fp16 _inversed_aw_209_y_0_to_fp16 = const()[name = string("_inversed_aw_209_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_209_cast_fp16 = mul(x = var_10886_cast_fp16, y = _inversed_aw_209_y_0_to_fp16)[name = string("_inversed_aw_209_cast_fp16")]; tensor aw_211_cast_fp16 = add(x = _inversed_aw_209_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_211_cast_fp16")]; int32 var_10900 = const()[name = string("op_10900"), val = int32(-1)]; tensor aw_215_cast_fp16 = softmax(axis = var_10900, x = aw_211_cast_fp16)[name = string("aw_215_cast_fp16")]; bool var_10906_transpose_x_1 = const()[name = string("op_10906_transpose_x_1"), val = bool(false)]; bool var_10906_transpose_y_1 = const()[name = string("op_10906_transpose_y_1"), val = bool(true)]; tensor var_10906_cast_fp16 = matmul(transpose_x = var_10906_transpose_x_1, transpose_y = var_10906_transpose_y_1, x = aw_215_cast_fp16, y = vc_107_cast_fp16)[name = string("op_10906_cast_fp16")]; tensor var_10909_perm_0 = const()[name = string("op_10909_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_10913 = const()[name = string("op_10913"), val = tensor([1, 1, -1])]; tensor var_10909_cast_fp16 = transpose(perm = var_10909_perm_0, x = var_10906_cast_fp16)[name = string("transpose_2")]; tensor input_263_cast_fp16 = reshape(shape = var_10913, x = var_10909_cast_fp16)[name = string("input_263_cast_fp16")]; tensor layers_26_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413399296))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415496512))))[name = string("layers_26_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_self_attn_o_proj_weight_to_fp16_palettized, x = input_263_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor var_10919_axes_0 = const()[name = string("op_10919_axes_0"), val = tensor([0])]; tensor var_10919_cast_fp16 = squeeze(axes = var_10919_axes_0, x = linear_185_cast_fp16)[name = string("op_10919_cast_fp16")]; tensor var_10921_axes_0 = const()[name = string("op_10921_axes_0"), val = tensor([0])]; tensor var_10921_cast_fp16 = squeeze(axes = var_10921_axes_0, x = var_10919_cast_fp16)[name = string("op_10921_cast_fp16")]; tensor var_10923_axes_0 = const()[name = string("op_10923_axes_0"), val = tensor([-1])]; tensor var_10923_cast_fp16 = expand_dims(axes = var_10923_axes_0, x = var_10921_cast_fp16)[name = string("op_10923_cast_fp16")]; tensor ao_53_axes_0 = const()[name = string("ao_53_axes_0"), val = tensor([-1])]; tensor ao_53_cast_fp16 = expand_dims(axes = ao_53_axes_0, x = var_10923_cast_fp16)[name = string("ao_53_cast_fp16")]; tensor hidden_105_cast_fp16 = add(x = hidden_103_cast_fp16, y = ao_53_cast_fp16)[name = string("hidden_105_cast_fp16")]; tensor var_10929_axes_0 = const()[name = string("op_10929_axes_0"), val = tensor([-1])]; tensor var_10929_cast_fp16 = squeeze(axes = var_10929_axes_0, x = hidden_105_cast_fp16)[name = string("op_10929_cast_fp16")]; tensor var_10931_axes_0 = const()[name = string("op_10931_axes_0"), val = tensor([-1])]; tensor var_10931_cast_fp16 = squeeze(axes = var_10931_axes_0, x = var_10929_cast_fp16)[name = string("op_10931_cast_fp16")]; tensor hidden_states_429_axes_0 = const()[name = string("hidden_states_429_axes_0"), val = tensor([0])]; tensor hidden_states_429_cast_fp16 = expand_dims(axes = hidden_states_429_axes_0, x = var_10931_cast_fp16)[name = string("hidden_states_429_cast_fp16")]; fp16 var_10937_promoted_to_fp16 = const()[name = string("op_10937_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_10943_cast_fp16 = pow(x = hidden_states_429_cast_fp16, y = var_10937_promoted_to_fp16)[name = string("op_10943_cast_fp16")]; tensor variance_215_axes_0 = const()[name = string("variance_215_axes_0"), val = tensor([-1])]; bool variance_215_keep_dims_0 = const()[name = string("variance_215_keep_dims_0"), val = bool(true)]; tensor variance_215_cast_fp16 = reduce_mean(axes = variance_215_axes_0, keep_dims = variance_215_keep_dims_0, x = var_10943_cast_fp16)[name = string("variance_215_cast_fp16")]; tensor const_270_to_fp16 = const()[name = string("const_270_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415497088)))]; tensor var_10947_cast_fp16 = mul(x = const_270_to_fp16, y = hidden_states_429_cast_fp16)[name = string("op_10947_cast_fp16")]; fp16 var_10948_to_fp16 = const()[name = string("op_10948_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_10949_cast_fp16 = add(x = variance_215_cast_fp16, y = var_10948_to_fp16)[name = string("op_10949_cast_fp16")]; fp32 var_10950_epsilon_0 = const()[name = string("op_10950_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_10950_cast_fp16 = rsqrt(epsilon = var_10950_epsilon_0, x = var_10949_cast_fp16)[name = string("op_10950_cast_fp16")]; tensor input_265_cast_fp16 = mul(x = var_10947_cast_fp16, y = var_10950_cast_fp16)[name = string("input_265_cast_fp16")]; tensor layers_26_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415499200))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418644992))))[name = string("layers_26_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_gate_proj_weight_to_fp16_palettized, x = input_265_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_10958_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_10958_cast_fp16")]; tensor layers_26_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418645568))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421791360))))[name = string("layers_26_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_26_mlp_up_proj_weight_to_fp16_palettized, x = input_265_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor input_269_cast_fp16 = mul(x = var_10958_cast_fp16, y = linear_187_cast_fp16)[name = string("input_269_cast_fp16")]; tensor layers_26_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421791936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424937728))))[name = string("layers_26_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_26_mlp_down_proj_weight_to_fp16_palettized, x = input_269_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor var_10965_axes_0 = const()[name = string("op_10965_axes_0"), val = tensor([0])]; tensor var_10965_cast_fp16 = squeeze(axes = var_10965_axes_0, x = linear_188_cast_fp16)[name = string("op_10965_cast_fp16")]; tensor var_10967_axes_0 = const()[name = string("op_10967_axes_0"), val = tensor([0])]; tensor var_10967_cast_fp16 = squeeze(axes = var_10967_axes_0, x = var_10965_cast_fp16)[name = string("op_10967_cast_fp16")]; tensor var_10969_axes_0 = const()[name = string("op_10969_axes_0"), val = tensor([-1])]; tensor var_10969_cast_fp16 = expand_dims(axes = var_10969_axes_0, x = var_10967_cast_fp16)[name = string("op_10969_cast_fp16")]; tensor h_53_axes_0 = const()[name = string("h_53_axes_0"), val = tensor([-1])]; tensor h_53_cast_fp16 = expand_dims(axes = h_53_axes_0, x = var_10969_cast_fp16)[name = string("h_53_cast_fp16")]; tensor hidden_107_cast_fp16 = add(x = hidden_105_cast_fp16, y = h_53_cast_fp16)[name = string("hidden_107_cast_fp16")]; tensor var_10983_begin_0 = const()[name = string("op_10983_begin_0"), val = tensor([0, 27648, 0, 0])]; tensor var_10983_end_0 = const()[name = string("op_10983_end_0"), val = tensor([1, 1, 1, 256])]; tensor var_10983_end_mask_0 = const()[name = string("op_10983_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_10983_cast_fp16 = slice_by_index(begin = var_10983_begin_0, end = var_10983_end_0, end_mask = var_10983_end_mask_0, x = key_cache)[name = string("op_10983_cast_fp16")]; tensor var_11003_begin_0 = const()[name = string("op_11003_begin_0"), val = tensor([0, 27648, 0, 0])]; tensor var_11003_end_0 = const()[name = string("op_11003_end_0"), val = tensor([1, 1, 1, 256])]; tensor var_11003_end_mask_0 = const()[name = string("op_11003_end_mask_0"), val = tensor([true, true, true, true])]; tensor var_11003_cast_fp16 = slice_by_index(begin = var_11003_begin_0, end = var_11003_end_0, end_mask = var_11003_end_mask_0, x = value_cache)[name = string("op_11003_cast_fp16")]; tensor var_11015_axes_0 = const()[name = string("op_11015_axes_0"), val = tensor([-1])]; tensor var_11015_cast_fp16 = squeeze(axes = var_11015_axes_0, x = hidden_107_cast_fp16)[name = string("op_11015_cast_fp16")]; tensor var_11017_axes_0 = const()[name = string("op_11017_axes_0"), val = tensor([-1])]; tensor var_11017_cast_fp16 = squeeze(axes = var_11017_axes_0, x = var_11015_cast_fp16)[name = string("op_11017_cast_fp16")]; tensor hidden_states_433_axes_0 = const()[name = string("hidden_states_433_axes_0"), val = tensor([0])]; tensor hidden_states_433_cast_fp16 = expand_dims(axes = hidden_states_433_axes_0, x = var_11017_cast_fp16)[name = string("hidden_states_433_cast_fp16")]; fp16 var_11023_promoted_to_fp16 = const()[name = string("op_11023_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11029_cast_fp16 = pow(x = hidden_states_433_cast_fp16, y = var_11023_promoted_to_fp16)[name = string("op_11029_cast_fp16")]; tensor variance_217_axes_0 = const()[name = string("variance_217_axes_0"), val = tensor([-1])]; bool variance_217_keep_dims_0 = const()[name = string("variance_217_keep_dims_0"), val = bool(true)]; tensor variance_217_cast_fp16 = reduce_mean(axes = variance_217_axes_0, keep_dims = variance_217_keep_dims_0, x = var_11029_cast_fp16)[name = string("variance_217_cast_fp16")]; tensor const_271_to_fp16 = const()[name = string("const_271_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424938304)))]; tensor var_11033_cast_fp16 = mul(x = const_271_to_fp16, y = hidden_states_433_cast_fp16)[name = string("op_11033_cast_fp16")]; fp16 var_11034_to_fp16 = const()[name = string("op_11034_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11035_cast_fp16 = add(x = variance_217_cast_fp16, y = var_11034_to_fp16)[name = string("op_11035_cast_fp16")]; fp32 var_11036_epsilon_0 = const()[name = string("op_11036_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11036_cast_fp16 = rsqrt(epsilon = var_11036_epsilon_0, x = var_11035_cast_fp16)[name = string("op_11036_cast_fp16")]; tensor input_271_cast_fp16 = mul(x = var_11033_cast_fp16, y = var_11036_cast_fp16)[name = string("input_271_cast_fp16")]; tensor layers_27_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424940416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427037632))))[name = string("layers_27_self_attn_q_proj_weight_to_fp16_palettized")]; tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = layers_27_self_attn_q_proj_weight_to_fp16_palettized, x = input_271_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor var_11045 = const()[name = string("op_11045"), val = tensor([1, 1, 16, 128])]; tensor var_11046_cast_fp16 = reshape(shape = var_11045, x = linear_189_cast_fp16)[name = string("op_11046_cast_fp16")]; tensor layers_27_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427038208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428086848))))[name = string("layers_27_self_attn_k_proj_weight_to_fp16_palettized")]; tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_k_proj_weight_to_fp16_palettized, x = input_271_cast_fp16)[name = string("linear_190_cast_fp16")]; tensor var_11057 = const()[name = string("op_11057"), val = tensor([1, 1, 8, 128])]; tensor var_11058_cast_fp16 = reshape(shape = var_11057, x = linear_190_cast_fp16)[name = string("op_11058_cast_fp16")]; tensor layers_27_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428087424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136064))))[name = string("layers_27_self_attn_v_proj_weight_to_fp16_palettized")]; tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_v_proj_weight_to_fp16_palettized, x = input_271_cast_fp16)[name = string("linear_191_cast_fp16")]; fp16 var_11077_promoted_to_fp16 = const()[name = string("op_11077_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11083_cast_fp16 = pow(x = var_11046_cast_fp16, y = var_11077_promoted_to_fp16)[name = string("op_11083_cast_fp16")]; bool variance_219_keep_dims_0 = const()[name = string("variance_219_keep_dims_0"), val = bool(true)]; tensor const_390 = const()[name = string("const_390"), val = tensor([3])]; tensor variance_219_cast_fp16 = reduce_mean(axes = const_390, keep_dims = variance_219_keep_dims_0, x = var_11083_cast_fp16)[name = string("variance_219_cast_fp16")]; tensor const_391_to_fp16 = const()[name = string("const_391_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136640)))]; tensor var_11087_cast_fp16 = mul(x = const_391_to_fp16, y = var_11046_cast_fp16)[name = string("op_11087_cast_fp16")]; fp16 var_11088_to_fp16 = const()[name = string("op_11088_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11089_cast_fp16 = add(x = variance_219_cast_fp16, y = var_11088_to_fp16)[name = string("op_11089_cast_fp16")]; fp32 var_11090_epsilon_0 = const()[name = string("op_11090_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11090_cast_fp16 = rsqrt(epsilon = var_11090_epsilon_0, x = var_11089_cast_fp16)[name = string("op_11090_cast_fp16")]; tensor q_163_cast_fp16 = mul(x = var_11087_cast_fp16, y = var_11090_cast_fp16)[name = string("q_163_cast_fp16")]; fp16 var_11095_promoted_to_fp16 = const()[name = string("op_11095_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11101_cast_fp16 = pow(x = var_11058_cast_fp16, y = var_11095_promoted_to_fp16)[name = string("op_11101_cast_fp16")]; bool variance_221_keep_dims_0 = const()[name = string("variance_221_keep_dims_0"), val = bool(true)]; tensor const_392 = const()[name = string("const_392"), val = tensor([3])]; tensor variance_221_cast_fp16 = reduce_mean(axes = const_392, keep_dims = variance_221_keep_dims_0, x = var_11101_cast_fp16)[name = string("variance_221_cast_fp16")]; tensor const_393_to_fp16 = const()[name = string("const_393_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429136960)))]; tensor var_11105_cast_fp16 = mul(x = const_393_to_fp16, y = var_11058_cast_fp16)[name = string("op_11105_cast_fp16")]; fp16 var_11106_to_fp16 = const()[name = string("op_11106_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11107_cast_fp16 = add(x = variance_221_cast_fp16, y = var_11106_to_fp16)[name = string("op_11107_cast_fp16")]; fp32 var_11108_epsilon_0 = const()[name = string("op_11108_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11108_cast_fp16 = rsqrt(epsilon = var_11108_epsilon_0, x = var_11107_cast_fp16)[name = string("op_11108_cast_fp16")]; tensor k_163_cast_fp16 = mul(x = var_11105_cast_fp16, y = var_11108_cast_fp16)[name = string("k_163_cast_fp16")]; tensor var_11123_cast_fp16 = mul(x = q_163_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11123_cast_fp16")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 1, 16, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_163_cast_fp16)[name = string("x1_109_cast_fp16")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 1, 16, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_163_cast_fp16)[name = string("x2_109_cast_fp16")]; fp16 const_276_promoted_to_fp16 = const()[name = string("const_276_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11144_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_276_promoted_to_fp16)[name = string("op_11144_cast_fp16")]; int32 var_11146 = const()[name = string("op_11146"), val = int32(-1)]; bool var_11147_interleave_0 = const()[name = string("op_11147_interleave_0"), val = bool(false)]; tensor var_11147_cast_fp16 = concat(axis = var_11146, interleave = var_11147_interleave_0, values = (var_11144_cast_fp16, x1_109_cast_fp16))[name = string("op_11147_cast_fp16")]; tensor var_11148_cast_fp16 = mul(x = var_11147_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11148_cast_fp16")]; tensor q_cast_fp16 = add(x = var_11123_cast_fp16, y = var_11148_cast_fp16)[name = string("q_cast_fp16")]; tensor var_11151_cast_fp16 = mul(x = k_163_cast_fp16, y = cos_1_cast_fp16)[name = string("op_11151_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 1, 8, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_163_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 1, 8, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_163_cast_fp16)[name = string("x2_cast_fp16")]; fp16 const_279_promoted_to_fp16 = const()[name = string("const_279_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_11172_cast_fp16 = mul(x = x2_cast_fp16, y = const_279_promoted_to_fp16)[name = string("op_11172_cast_fp16")]; int32 var_11174 = const()[name = string("op_11174"), val = int32(-1)]; bool var_11175_interleave_0 = const()[name = string("op_11175_interleave_0"), val = bool(false)]; tensor var_11175_cast_fp16 = concat(axis = var_11174, interleave = var_11175_interleave_0, values = (var_11172_cast_fp16, x1_cast_fp16))[name = string("op_11175_cast_fp16")]; tensor var_11176_cast_fp16 = mul(x = var_11175_cast_fp16, y = sin_1_cast_fp16)[name = string("op_11176_cast_fp16")]; tensor k_cast_fp16 = add(x = var_11151_cast_fp16, y = var_11176_cast_fp16)[name = string("k_cast_fp16")]; tensor var_11183 = const()[name = string("op_11183"), val = tensor([1, 1024, 1, 1])]; tensor nk_cast_fp16 = reshape(shape = var_11183, x = k_cast_fp16)[name = string("nk_cast_fp16")]; tensor var_11189 = const()[name = string("op_11189"), val = tensor([1, 1024, 1, 1])]; tensor nv_cast_fp16 = reshape(shape = var_11189, x = linear_191_cast_fp16)[name = string("nv_cast_fp16")]; tensor var_11194_cast_fp16 = mul(x = var_10983_cast_fp16, y = var_1203_cast_fp16)[name = string("op_11194_cast_fp16")]; tensor var_11195_cast_fp16 = mul(x = nk_cast_fp16, y = update_mask_cast_fp16)[name = string("op_11195_cast_fp16")]; tensor lkc_cast_fp16 = add(x = var_11194_cast_fp16, y = var_11195_cast_fp16)[name = string("lkc_cast_fp16")]; tensor var_11201_cast_fp16 = mul(x = var_11003_cast_fp16, y = var_1203_cast_fp16)[name = string("op_11201_cast_fp16")]; tensor var_11202_cast_fp16 = mul(x = nv_cast_fp16, y = update_mask_cast_fp16)[name = string("op_11202_cast_fp16")]; tensor lvc_cast_fp16 = add(x = var_11201_cast_fp16, y = var_11202_cast_fp16)[name = string("lvc_cast_fp16")]; tensor var_11206_axes_0 = const()[name = string("op_11206_axes_0"), val = tensor([2])]; tensor var_11206_cast_fp16 = squeeze(axes = var_11206_axes_0, x = lkc_cast_fp16)[name = string("op_11206_cast_fp16")]; tensor var_11211 = const()[name = string("op_11211"), val = tensor([1, 8, 128, 256])]; tensor kc_109_cast_fp16 = reshape(shape = var_11211, x = var_11206_cast_fp16)[name = string("kc_109_cast_fp16")]; tensor var_11214_axes_0 = const()[name = string("op_11214_axes_0"), val = tensor([2])]; tensor var_11214_cast_fp16 = squeeze(axes = var_11214_axes_0, x = lvc_cast_fp16)[name = string("op_11214_cast_fp16")]; tensor var_11219 = const()[name = string("op_11219"), val = tensor([1, 8, 128, 256])]; tensor vc_109_cast_fp16 = reshape(shape = var_11219, x = var_11214_cast_fp16)[name = string("vc_109_cast_fp16")]; tensor var_11222_axes_0 = const()[name = string("op_11222_axes_0"), val = tensor([2])]; tensor var_11222_cast_fp16 = expand_dims(axes = var_11222_axes_0, x = kc_109_cast_fp16)[name = string("op_11222_cast_fp16")]; tensor var_11230_reps_0 = const()[name = string("op_11230_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_11230_cast_fp16 = tile(reps = var_11230_reps_0, x = var_11222_cast_fp16)[name = string("op_11230_cast_fp16")]; tensor var_11235 = const()[name = string("op_11235"), val = tensor([1, 16, 128, 256])]; tensor kc_cast_fp16 = reshape(shape = var_11235, x = var_11230_cast_fp16)[name = string("kc_cast_fp16")]; tensor var_11238_axes_0 = const()[name = string("op_11238_axes_0"), val = tensor([2])]; tensor var_11238_cast_fp16 = expand_dims(axes = var_11238_axes_0, x = vc_109_cast_fp16)[name = string("op_11238_cast_fp16")]; tensor var_11246_reps_0 = const()[name = string("op_11246_reps_0"), val = tensor([1, 1, 2, 1, 1])]; tensor var_11246_cast_fp16 = tile(reps = var_11246_reps_0, x = var_11238_cast_fp16)[name = string("op_11246_cast_fp16")]; tensor var_11251 = const()[name = string("op_11251"), val = tensor([1, 16, 128, 256])]; tensor vc_cast_fp16 = reshape(shape = var_11251, x = var_11246_cast_fp16)[name = string("vc_cast_fp16")]; tensor var_11255_perm_0 = const()[name = string("op_11255_perm_0"), val = tensor([0, 2, -3, -1])]; bool var_11256_transpose_x_0 = const()[name = string("op_11256_transpose_x_0"), val = bool(false)]; bool var_11256_transpose_y_0 = const()[name = string("op_11256_transpose_y_0"), val = bool(false)]; tensor var_11255_cast_fp16 = transpose(perm = var_11255_perm_0, x = q_cast_fp16)[name = string("transpose_1")]; tensor var_11256_cast_fp16 = matmul(transpose_x = var_11256_transpose_x_0, transpose_y = var_11256_transpose_y_0, x = var_11255_cast_fp16, y = kc_cast_fp16)[name = string("op_11256_cast_fp16")]; fp16 _inversed_aw_217_y_0_to_fp16 = const()[name = string("_inversed_aw_217_y_0_to_fp16"), val = fp16(0x1.6ap-4)]; tensor _inversed_aw_217_cast_fp16 = mul(x = var_11256_cast_fp16, y = _inversed_aw_217_y_0_to_fp16)[name = string("_inversed_aw_217_cast_fp16")]; tensor aw_219_cast_fp16 = add(x = _inversed_aw_217_cast_fp16, y = var_1272_cast_fp16)[name = string("aw_219_cast_fp16")]; int32 var_11270 = const()[name = string("op_11270"), val = int32(-1)]; tensor aw_cast_fp16 = softmax(axis = var_11270, x = aw_219_cast_fp16)[name = string("aw_cast_fp16")]; bool var_11276_transpose_x_1 = const()[name = string("op_11276_transpose_x_1"), val = bool(false)]; bool var_11276_transpose_y_1 = const()[name = string("op_11276_transpose_y_1"), val = bool(true)]; tensor var_11276_cast_fp16 = matmul(transpose_x = var_11276_transpose_x_1, transpose_y = var_11276_transpose_y_1, x = aw_cast_fp16, y = vc_cast_fp16)[name = string("op_11276_cast_fp16")]; tensor var_11279_perm_0 = const()[name = string("op_11279_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_11283 = const()[name = string("op_11283"), val = tensor([1, 1, -1])]; tensor var_11279_cast_fp16 = transpose(perm = var_11279_perm_0, x = var_11276_cast_fp16)[name = string("transpose_0")]; tensor input_273_cast_fp16 = reshape(shape = var_11283, x = var_11279_cast_fp16)[name = string("input_273_cast_fp16")]; tensor layers_27_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429137280))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431234496))))[name = string("layers_27_self_attn_o_proj_weight_to_fp16_palettized")]; tensor linear_192_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_self_attn_o_proj_weight_to_fp16_palettized, x = input_273_cast_fp16)[name = string("linear_192_cast_fp16")]; tensor var_11289_axes_0 = const()[name = string("op_11289_axes_0"), val = tensor([0])]; tensor var_11289_cast_fp16 = squeeze(axes = var_11289_axes_0, x = linear_192_cast_fp16)[name = string("op_11289_cast_fp16")]; tensor var_11291_axes_0 = const()[name = string("op_11291_axes_0"), val = tensor([0])]; tensor var_11291_cast_fp16 = squeeze(axes = var_11291_axes_0, x = var_11289_cast_fp16)[name = string("op_11291_cast_fp16")]; tensor var_11293_axes_0 = const()[name = string("op_11293_axes_0"), val = tensor([-1])]; tensor var_11293_cast_fp16 = expand_dims(axes = var_11293_axes_0, x = var_11291_cast_fp16)[name = string("op_11293_cast_fp16")]; tensor ao_axes_0 = const()[name = string("ao_axes_0"), val = tensor([-1])]; tensor ao_cast_fp16 = expand_dims(axes = ao_axes_0, x = var_11293_cast_fp16)[name = string("ao_cast_fp16")]; tensor hidden_109_cast_fp16 = add(x = hidden_107_cast_fp16, y = ao_cast_fp16)[name = string("hidden_109_cast_fp16")]; tensor var_11299_axes_0 = const()[name = string("op_11299_axes_0"), val = tensor([-1])]; tensor var_11299_cast_fp16 = squeeze(axes = var_11299_axes_0, x = hidden_109_cast_fp16)[name = string("op_11299_cast_fp16")]; tensor var_11301_axes_0 = const()[name = string("op_11301_axes_0"), val = tensor([-1])]; tensor var_11301_cast_fp16 = squeeze(axes = var_11301_axes_0, x = var_11299_cast_fp16)[name = string("op_11301_cast_fp16")]; tensor hidden_states_445_axes_0 = const()[name = string("hidden_states_445_axes_0"), val = tensor([0])]; tensor hidden_states_445_cast_fp16 = expand_dims(axes = hidden_states_445_axes_0, x = var_11301_cast_fp16)[name = string("hidden_states_445_cast_fp16")]; fp16 var_11307_promoted_to_fp16 = const()[name = string("op_11307_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11313_cast_fp16 = pow(x = hidden_states_445_cast_fp16, y = var_11307_promoted_to_fp16)[name = string("op_11313_cast_fp16")]; tensor variance_223_axes_0 = const()[name = string("variance_223_axes_0"), val = tensor([-1])]; bool variance_223_keep_dims_0 = const()[name = string("variance_223_keep_dims_0"), val = bool(true)]; tensor variance_223_cast_fp16 = reduce_mean(axes = variance_223_axes_0, keep_dims = variance_223_keep_dims_0, x = var_11313_cast_fp16)[name = string("variance_223_cast_fp16")]; tensor const_280_to_fp16 = const()[name = string("const_280_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431235072)))]; tensor var_11317_cast_fp16 = mul(x = const_280_to_fp16, y = hidden_states_445_cast_fp16)[name = string("op_11317_cast_fp16")]; fp16 var_11318_to_fp16 = const()[name = string("op_11318_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11319_cast_fp16 = add(x = variance_223_cast_fp16, y = var_11318_to_fp16)[name = string("op_11319_cast_fp16")]; fp32 var_11320_epsilon_0 = const()[name = string("op_11320_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11320_cast_fp16 = rsqrt(epsilon = var_11320_epsilon_0, x = var_11319_cast_fp16)[name = string("op_11320_cast_fp16")]; tensor input_275_cast_fp16 = mul(x = var_11317_cast_fp16, y = var_11320_cast_fp16)[name = string("input_275_cast_fp16")]; tensor layers_27_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431237184))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434382976))))[name = string("layers_27_mlp_gate_proj_weight_to_fp16_palettized")]; tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_gate_proj_weight_to_fp16_palettized, x = input_275_cast_fp16)[name = string("linear_193_cast_fp16")]; tensor var_11328_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_11328_cast_fp16")]; tensor layers_27_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(434383552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437529344))))[name = string("layers_27_mlp_up_proj_weight_to_fp16_palettized")]; tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = layers_27_mlp_up_proj_weight_to_fp16_palettized, x = input_275_cast_fp16)[name = string("linear_194_cast_fp16")]; tensor input_279_cast_fp16 = mul(x = var_11328_cast_fp16, y = linear_194_cast_fp16)[name = string("input_279_cast_fp16")]; tensor layers_27_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437529920))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440675712))))[name = string("layers_27_mlp_down_proj_weight_to_fp16_palettized")]; tensor linear_195_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = layers_27_mlp_down_proj_weight_to_fp16_palettized, x = input_279_cast_fp16)[name = string("linear_195_cast_fp16")]; tensor var_11335_axes_0 = const()[name = string("op_11335_axes_0"), val = tensor([0])]; tensor var_11335_cast_fp16 = squeeze(axes = var_11335_axes_0, x = linear_195_cast_fp16)[name = string("op_11335_cast_fp16")]; tensor var_11337_axes_0 = const()[name = string("op_11337_axes_0"), val = tensor([0])]; tensor var_11337_cast_fp16 = squeeze(axes = var_11337_axes_0, x = var_11335_cast_fp16)[name = string("op_11337_cast_fp16")]; tensor var_11339_axes_0 = const()[name = string("op_11339_axes_0"), val = tensor([-1])]; tensor var_11339_cast_fp16 = expand_dims(axes = var_11339_axes_0, x = var_11337_cast_fp16)[name = string("op_11339_cast_fp16")]; tensor h_axes_0 = const()[name = string("h_axes_0"), val = tensor([-1])]; tensor h_cast_fp16 = expand_dims(axes = h_axes_0, x = var_11339_cast_fp16)[name = string("h_cast_fp16")]; tensor hidden_cast_fp16 = add(x = hidden_109_cast_fp16, y = h_cast_fp16)[name = string("hidden_cast_fp16")]; tensor var_11345_axes_0 = const()[name = string("op_11345_axes_0"), val = tensor([-1])]; tensor var_11345_cast_fp16 = squeeze(axes = var_11345_axes_0, x = hidden_cast_fp16)[name = string("op_11345_cast_fp16")]; tensor var_11347_axes_0 = const()[name = string("op_11347_axes_0"), val = tensor([-1])]; tensor var_11347_cast_fp16 = squeeze(axes = var_11347_axes_0, x = var_11345_cast_fp16)[name = string("op_11347_cast_fp16")]; tensor hidden_states_449_axes_0 = const()[name = string("hidden_states_449_axes_0"), val = tensor([0])]; tensor hidden_states_449_cast_fp16 = expand_dims(axes = hidden_states_449_axes_0, x = var_11347_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; fp16 var_11353_promoted_to_fp16 = const()[name = string("op_11353_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_11359_cast_fp16 = pow(x = hidden_states_449_cast_fp16, y = var_11353_promoted_to_fp16)[name = string("op_11359_cast_fp16")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_11359_cast_fp16)[name = string("variance_cast_fp16")]; tensor const_281_to_fp16 = const()[name = string("const_281_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440676288)))]; tensor var_11363_cast_fp16 = mul(x = const_281_to_fp16, y = hidden_states_449_cast_fp16)[name = string("op_11363_cast_fp16")]; fp16 var_11364_to_fp16 = const()[name = string("op_11364_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_11365_cast_fp16 = add(x = variance_cast_fp16, y = var_11364_to_fp16)[name = string("op_11365_cast_fp16")]; fp32 var_11366_epsilon_0 = const()[name = string("op_11366_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_11366_cast_fp16 = rsqrt(epsilon = var_11366_epsilon_0, x = var_11365_cast_fp16)[name = string("op_11366_cast_fp16")]; tensor input_cast_fp16 = mul(x = var_11363_cast_fp16, y = var_11366_cast_fp16)[name = string("input_cast_fp16")]; tensor codec_head_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(440678400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443824192))))[name = string("codec_head_weight_to_fp16_palettized")]; tensor logits = linear(bias = linear_4_bias_0_to_fp16, weight = codec_head_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("linear_196_cast_fp16")]; tensor var_11375_axes_0 = const()[name = string("op_11375_axes_0"), val = tensor([0])]; tensor var_11375_cast_fp16 = squeeze(axes = var_11375_axes_0, x = hidden_states_449_cast_fp16)[name = string("op_11375_cast_fp16")]; tensor var_11377_axes_0 = const()[name = string("op_11377_axes_0"), val = tensor([-1])]; tensor var_11377_cast_fp16 = expand_dims(axes = var_11377_axes_0, x = var_11375_cast_fp16)[name = string("op_11377_cast_fp16")]; tensor var_11379_axes_0 = const()[name = string("op_11379_axes_0"), val = tensor([-1])]; tensor hidden_states = expand_dims(axes = var_11379_axes_0, x = var_11377_cast_fp16)[name = string("op_11379_cast_fp16")]; int32 var_11381 = const()[name = string("op_11381"), val = int32(1)]; bool new_kv_k_interleave_0 = const()[name = string("new_kv_k_interleave_0"), val = bool(false)]; tensor new_kv_k_cast_fp16 = concat(axis = var_11381, interleave = new_kv_k_interleave_0, values = (nk_1_cast_fp16, nk_3_cast_fp16, nk_5_cast_fp16, nk_7_cast_fp16, nk_9_cast_fp16, nk_11_cast_fp16, nk_13_cast_fp16, nk_15_cast_fp16, nk_17_cast_fp16, nk_19_cast_fp16, nk_21_cast_fp16, nk_23_cast_fp16, nk_25_cast_fp16, nk_27_cast_fp16, nk_29_cast_fp16, nk_31_cast_fp16, nk_33_cast_fp16, nk_35_cast_fp16, nk_37_cast_fp16, nk_39_cast_fp16, nk_41_cast_fp16, nk_43_cast_fp16, nk_45_cast_fp16, nk_47_cast_fp16, nk_49_cast_fp16, nk_51_cast_fp16, nk_53_cast_fp16, nk_cast_fp16))[name = string("new_kv_k_cast_fp16")]; int32 var_11384 = const()[name = string("op_11384"), val = int32(1)]; bool new_kv_v_interleave_0 = const()[name = string("new_kv_v_interleave_0"), val = bool(false)]; tensor new_kv_v_cast_fp16 = concat(axis = var_11384, interleave = new_kv_v_interleave_0, values = (nv_1_cast_fp16, nv_3_cast_fp16, nv_5_cast_fp16, nv_7_cast_fp16, nv_9_cast_fp16, nv_11_cast_fp16, nv_13_cast_fp16, nv_15_cast_fp16, nv_17_cast_fp16, nv_19_cast_fp16, nv_21_cast_fp16, nv_23_cast_fp16, nv_25_cast_fp16, nv_27_cast_fp16, nv_29_cast_fp16, nv_31_cast_fp16, nv_33_cast_fp16, nv_35_cast_fp16, nv_37_cast_fp16, nv_39_cast_fp16, nv_41_cast_fp16, nv_43_cast_fp16, nv_45_cast_fp16, nv_47_cast_fp16, nv_49_cast_fp16, nv_51_cast_fp16, nv_53_cast_fp16, nv_cast_fp16))[name = string("new_kv_v_cast_fp16")]; tensor var_11389_cast_fp16 = mul(x = key_cache, y = var_1203_cast_fp16)[name = string("op_11389_cast_fp16")]; tensor var_11390_cast_fp16 = mul(x = new_kv_k_cast_fp16, y = update_mask_cast_fp16)[name = string("op_11390_cast_fp16")]; tensor new_key_cache = add(x = var_11389_cast_fp16, y = var_11390_cast_fp16)[name = string("op_11392_cast_fp16")]; tensor var_11396_cast_fp16 = mul(x = value_cache, y = var_1203_cast_fp16)[name = string("op_11396_cast_fp16")]; tensor var_11397_cast_fp16 = mul(x = new_kv_v_cast_fp16, y = update_mask_cast_fp16)[name = string("op_11397_cast_fp16")]; tensor new_value_cache = add(x = var_11396_cast_fp16, y = var_11397_cast_fp16)[name = string("op_11399_cast_fp16")]; } -> (logits, hidden_states, new_key_cache, new_value_cache); }